Exemple #1
0
def _build_graph(boxes, iou_threshold):
    """Build graph based on box IoU"""
    overlaps = box_utils.bbox_overlaps(
        boxes.astype(dtype=np.float32, copy=False),
        boxes.astype(dtype=np.float32, copy=False))

    return (overlaps > iou_threshold).astype(np.float32)
def get_labels(model, i):
    workspace.ResetWorkspace()
    workspace.RunNetOnce(model.param_init_net)
    #print(str(model.param_init_net.Proto()))
    #with open(os.path.join(os.getcwd(), "train_net.pbtxt"), 'w') as fid:
    #    fid.write(str(model.net.Proto()))
    #with open(os.path.join(os.getcwd(), "train_init_net.pbtxt"), 'w') as fid:
    #    fid.write(str(model.param_init_net.Proto()))
    roidb = workspace.FetchBlob(core.ScopedName("roidb"))
    for entry in roidb:
        print("roidb: ", entry.keys())
        return

    #label_boxes = workspace.FetchBlob(core.ScopedName("labels_int32"))
    #gt_boxes = workspace.FetchBlob(core.ScopedName("bbox_targets"))
    pred_boxes = workspace.FetchBlob(
        core.ScopedName('bbox_pred_stage_' + str(i + 1)))
    num_inside = pred_boxes.shape[0]

    labels = np.empty((num_inside, ), dtype=np.int32)
    labels.fill(0)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = box_utils.bbox_overlaps(pred_boxes, gt_boxes)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside),
                                                anchor_to_gt_argmax]
        # Fg label: above threshold IOU
        labels = np.array([label_boxes[i] for i in anchor_to_gt_argmax],
                          dtype=np.int32)
    workspace.FeedBlob(core.ScopedName("labels_stage_" + str(i + 1)), labels)
Exemple #3
0
def _compute_targets(entry):
    """Compute bounding-box regression targets for an image."""
    # Indices of ground-truth ROIs
    rois = entry['boxes']
    overlaps = entry['max_overlaps']
    labels = entry['max_classes']
    gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
    # Targets has format (class, tx, ty, tw, th)
    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return targets

    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = box_utils.bbox_overlaps(
        rois[ex_inds, :].astype(dtype=np.float32, copy=False),
        rois[gt_inds, :].astype(dtype=np.float32, copy=False))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]
    # Use class "1" for all boxes if using class_agnostic_bbox_reg
    targets[ex_inds,
            0] = (1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds])
    targets[ex_inds,
            1:] = box_utils.bbox_transform_inv(ex_rois, gt_rois,
                                               cfg.MODEL.BBOX_REG_WEIGHTS)
    return targets
Exemple #4
0
def compute_depth_targets(entry):
    """Compute centroid depth regression targets for an image."""
    # Indices of ground-truth distances from bbox centroid to camera

    rois = entry['boxes']
    overlaps = entry['max_overlaps']
    labels = entry['max_classes']
    gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]

    # Targets has format (class, depth)
    targets = np.zeros((labels.shape[0], 2), dtype=np.float32)
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return targets

    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = box_utils.bbox_overlaps(
        rois[ex_inds, :].astype(dtype=np.float32, copy=False),
        rois[gt_inds, :].astype(dtype=np.float32, copy=False))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_depths = entry['depths'][gt_inds, :]

    targets[ex_inds,
            0] = (1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds])
    targets[ex_inds, 1:] = gt_depths[gt_assignment, :]

    return targets
Exemple #5
0
def compute_bbox_regression_targets(entry):
    """Compute bounding-box regression targets for an image."""
    # Indices of ground-truth ROIs
    rois = entry['boxes']
    overlaps = entry['max_overlaps']
    labels = entry['max_classes']
    gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
    # Targets has format (class, tx, ty, tw, th)
    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return targets

    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = box_utils.bbox_overlaps(
        rois[ex_inds, :].astype(dtype=np.float32, copy=False),
        rois[gt_inds, :].astype(dtype=np.float32, copy=False))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]
    # Use class "1" for all boxes if using class_agnostic_bbox_reg
    targets[ex_inds, 0] = (
        1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds])
    targets[ex_inds, 1:] = box_utils.bbox_transform_inv(
        ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS)
    return targets
Exemple #6
0
def _merge_proposal_boxes_into_roidb(roidb, box_list, score_list):
    """Add proposal boxes to each roidb entry."""
    assert len(box_list) == len(roidb)
    for i, entry in enumerate(roidb):
        boxes = box_list[i]
        scores = score_list[i]
        num_boxes = boxes.shape[0]
        gt_overlaps = np.zeros((num_boxes, entry['gt_overlaps'].shape[1]),
                               dtype=entry['gt_overlaps'].dtype)
        box_to_gt_ind_map = -np.ones(
            (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype)

        # Note: unlike in other places, here we intentionally include all gt
        # rois, even ones marked as crowd. Boxes that overlap with crowds will
        # be filtered out later (see: _filter_crowd_proposals).
        gt_inds = np.where(entry['gt_classes'] > 0)[0]
        if len(gt_inds) > 0:
            gt_boxes = entry['boxes'][gt_inds, :]
            gt_classes = entry['gt_classes'][gt_inds]
            proposal_to_gt_overlaps = box_utils.bbox_overlaps(
                boxes.astype(dtype=np.float32, copy=False),
                gt_boxes.astype(dtype=np.float32, copy=False))
            # Gt box that overlaps each input box the most
            # (ties are broken arbitrarily by class order)
            argmaxes = proposal_to_gt_overlaps.argmax(axis=1)
            # Amount of that overlap
            maxes = proposal_to_gt_overlaps.max(axis=1)
            # Those boxes with non-zero overlap with gt boxes
            I = np.where(maxes > 0)[0]
            # Record max overlaps with the class of the appropriate gt box
            gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
            box_to_gt_ind_map[I] = gt_inds[argmaxes[I]]
        entry['boxes'] = np.append(entry['boxes'],
                                   boxes.astype(entry['boxes'].dtype,
                                                copy=False),
                                   axis=0)
        entry['obn_scores'] = np.append(entry['obn_scores'],
                                        scores.astype(
                                            entry['obn_scores'].dtype,
                                            copy=False),
                                        axis=0)
        entry['gt_classes'] = np.append(
            entry['gt_classes'],
            np.zeros((num_boxes), dtype=entry['gt_classes'].dtype))
        entry['seg_areas'] = np.append(
            entry['seg_areas'],
            np.zeros((num_boxes), dtype=entry['seg_areas'].dtype))
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(
            entry['is_crowd'],
            np.zeros((num_boxes), dtype=entry['is_crowd'].dtype))
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'],
            box_to_gt_ind_map.astype(entry['box_to_gt_ind_map'].dtype,
                                     copy=False))
Exemple #7
0
    def forward(self, inputs, outputs):
        """See modeling.detector.AddBBoxAccuracy for inputs/outputs
        documentation.
        """

        # predicted bbox deltas
        bbox_deltas = inputs[0].data
        # proposals
        bbox_data = inputs[1].data
        assert bbox_data.shape[1] == 5
        bbox_prior = bbox_data[:, 1:]
        # labels
        labels = inputs[2].data
        # mapped gt boxes
        mapped_gt_boxes = inputs[3].data
        gt_boxes = mapped_gt_boxes[:, :4]
        max_overlap = mapped_gt_boxes[:, 4]

        # bbox iou only for fg and non-gt boxes
        keep_inds = np.where((labels > 0) & (max_overlap < 1.0))[0]
        num_boxes = keep_inds.size
        bbox_deltas = bbox_deltas[keep_inds, :]
        bbox_prior = bbox_prior[keep_inds, :]
        labels = labels[keep_inds]
        gt_boxes = gt_boxes[keep_inds, :]
        max_overlap = max_overlap[keep_inds]

        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG or num_boxes == 0:
            bbox_deltas = bbox_deltas[:, -4:]
        else:
            bbox_deltas = np.vstack([
                bbox_deltas[i, labels[i] * 4:labels[i] * 4 + 4]
                for i in range(num_boxes)
            ])
        pred_boxes = box_utils.bbox_transform(bbox_prior, bbox_deltas,
                                              self._bbox_reg_weights)

        avg_iou = 0.
        pre_avg_iou = sum(max_overlap)
        for i in range(num_boxes):
            gt_box = gt_boxes[i, :]
            pred_box = pred_boxes[i, :]
            tmp_iou = box_utils.bbox_overlaps(
                gt_box[np.newaxis, :].astype(dtype=np.float32, copy=False),
                pred_box[np.newaxis, :].astype(dtype=np.float32, copy=False),
            )
            avg_iou += tmp_iou[0]
        if num_boxes > 0:
            avg_iou /= num_boxes
            pre_avg_iou /= num_boxes
        outputs[0].reshape([1])
        outputs[0].data[...] = avg_iou
        outputs[1].reshape([1])
        outputs[1].data[...] = pre_avg_iou
Exemple #8
0
 def _do_test(b1, b2):
     # Compute IoU overlap with the cython implementation
     cython_iou = box_utils.bbox_overlaps(b1, b2)
     # Compute IoU overlap with the COCO API implementation
     # (requires converting boxes from xyxy to xywh format)
     xywh_b1 = box_utils.xyxy_to_xywh(b1)
     xywh_b2 = box_utils.xyxy_to_xywh(b2)
     not_crowd = [int(False)] * b2.shape[0]
     coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd)
     # IoUs should be similar
     np.testing.assert_array_almost_equal(
         cython_iou, coco_ious, decimal=5
     )
Exemple #9
0
def _get_proposal_clusters(all_rois, proposals, im_labels, cls_prob):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    num_images, num_classes = im_labels.shape
    assert num_images == 1, 'batch size shoud be equal to 1'
    # overlaps: (rois x gt_boxes)
    gt_boxes = proposals['gt_boxes']
    gt_labels = proposals['gt_classes']
    gt_scores = proposals['gt_scores']
    overlaps = box_utils.bbox_overlaps(
        all_rois.astype(dtype=np.float32, copy=False),
        gt_boxes.astype(dtype=np.float32, copy=False))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_labels[gt_assignment, 0]
    cls_loss_weights = gt_scores[gt_assignment, 0]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg_TRAIN_FG_THRESH)[0]

    # Select background RoIs as those with < FG_THRESH overlap
    bg_inds = np.where(max_overlaps < cfg_TRAIN_FG_THRESH)[0]

    ig_inds = np.where(max_overlaps < cfg_TRAIN_BG_THRESH)[0]
    cls_loss_weights[ig_inds] = 0.0

    labels[bg_inds] = 0
    gt_assignment[bg_inds] = -1

    img_cls_loss_weights = np.zeros(gt_boxes.shape[0], dtype=np.float32)
    pc_probs = np.zeros(gt_boxes.shape[0], dtype=np.float32)
    pc_labels = np.zeros(gt_boxes.shape[0], dtype=np.int32)
    pc_count = np.zeros(gt_boxes.shape[0], dtype=np.int32)

    for i in xrange(gt_boxes.shape[0]):
        po_index = np.where(gt_assignment == i)[0]
        img_cls_loss_weights[i] = np.sum(cls_loss_weights[po_index])
        pc_labels[i] = gt_labels[i, 0]
        pc_count[i] = len(po_index)
        pc_probs[i] = np.average(cls_prob[po_index, pc_labels[i]])

    return labels, cls_loss_weights, gt_assignment, pc_labels, pc_probs, pc_count, img_cls_loss_weights
Exemple #10
0
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    IsFlipped = roidb['flipped']
    M = cfg.BODY_UV_RCNN.HEATMAP_SIZE
    #
    polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0]
    boxes_from_polys = [roidb['boxes'][i,:] for i in polys_gt_inds]
    if not(boxes_from_polys):
        pass
    else:
        boxes_from_polys = np.vstack(boxes_from_polys)
    boxes_from_polys = np.array(boxes_from_polys)

    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = np.zeros( blobs['labels_int32'].shape )

    if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0) ):
        rois_fg = sampled_boxes[fg_inds]
        #
        rois_fg.astype(np.float32, copy=False)
        boxes_from_polys.astype(np.float32, copy=False)
        #
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1)
        fg_inds = fg_inds[fg_polys_value>0.7]

    if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0) ):
        for jj in fg_inds:
            roi_has_mask[jj] = 1
         
        # Create blobs for densepose supervision.
        ################################################## The mask
        All_labels = blob_utils.zeros((fg_inds.shape[0], M ** 2), int32=True)
        All_Weights = blob_utils.zeros((fg_inds.shape[0], M ** 2), int32=True)
        ################################################# The points
        X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True)
        I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True)
        U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        #################################################

        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        for i in range(rois_fg.shape[0]):
            #
            fg_polys_ind = polys_gt_inds[ fg_polys_inds[i] ]
            #
            Ilabel = segm_utils.GetDensePoseMask( roidb['dp_masks'][ fg_polys_ind ] )
            #
            GT_I = np.array(roidb['dp_I'][ fg_polys_ind ])
            GT_U = np.array(roidb['dp_U'][ fg_polys_ind ])
            GT_V = np.array(roidb['dp_V'][ fg_polys_ind ])
            GT_x = np.array(roidb['dp_x'][ fg_polys_ind ])
            GT_y = np.array(roidb['dp_y'][ fg_polys_ind ])
            GT_weights = np.ones(GT_I.shape).astype(np.float32)
            #
            ## Do the flipping of the densepose annotation !
            if(IsFlipped):
                GT_I,GT_U,GT_V,GT_x,GT_y,Ilabel = DP.get_symmetric_densepose(GT_I,GT_U,GT_V,GT_x,GT_y,Ilabel)
            #
            roi_fg = rois_fg[i]
            roi_gt = boxes_from_polys[fg_polys_inds[i],:]
            #
            x1 = roi_fg[0]  ;   x2 = roi_fg[2]
            y1 = roi_fg[1]  ;   y2 = roi_fg[3]
            #
            x1_source = roi_gt[0];  x2_source = roi_gt[2]
            y1_source = roi_gt[1];  y2_source = roi_gt[3]
            #
            x_targets  = ( np.arange(x1,x2, (x2 - x1)/M ) - x1_source ) * ( 256. / (x2_source-x1_source) )  
            y_targets  = ( np.arange(y1,y2, (y2 - y1)/M ) - y1_source ) * ( 256. / (y2_source-y1_source) )  
            #
            x_targets = x_targets[0:M] ## Strangely sometimes it can be M+1, so make sure size is OK!
            y_targets = y_targets[0:M]
            #
            [X_targets,Y_targets] = np.meshgrid( x_targets, y_targets )
            New_Index = cv2.remap(Ilabel,X_targets.astype(np.float32), Y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode= cv2.BORDER_CONSTANT, borderValue=(0))
            #
            All_L = np.zeros(New_Index.shape)
            All_W = np.ones(New_Index.shape)
            #
            All_L = New_Index
            #
            gt_length_x = x2_source - x1_source
            gt_length_y = y2_source - y1_source
            #
            GT_y =  ((  GT_y / 256. * gt_length_y  ) + y1_source - y1 ) *  ( M /  ( y2 - y1 ) )
            GT_x =  ((  GT_x / 256. * gt_length_x  ) + x1_source - x1 ) *  ( M /  ( x2 - x1 ) )
            #
            GT_I[GT_y<0] = 0
            GT_I[GT_y>(M-1)] = 0
            GT_I[GT_x<0] = 0
            GT_I[GT_x>(M-1)] = 0
            #
            points_inside = GT_I>0
            GT_U = GT_U[points_inside]
            GT_V = GT_V[points_inside]
            GT_x = GT_x[points_inside]
            GT_y = GT_y[points_inside]
            GT_weights = GT_weights[points_inside]
            GT_I = GT_I[points_inside]
            #
            X_points[i, 0:len(GT_x)] = GT_x
            Y_points[i, 0:len(GT_y)] = GT_y
            Ind_points[i, 0:len(GT_I)] = i
            I_points[i, 0:len(GT_I)] = GT_I
            U_points[i, 0:len(GT_U)] = GT_U
            V_points[i, 0:len(GT_V)] = GT_V
            Uv_point_weights[i, 0:len(GT_weights)] = GT_weights
            #
            All_labels[i, :] = np.reshape(All_L.astype(np.int32), M ** 2)
            All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M ** 2)
            ##
    else:
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        #
        if(len(bg_inds)==0):
            rois_fg = sampled_boxes[0].reshape((1, -1))
        else:
            rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))

        roi_has_mask[0] = 1
        #
        X_points = blob_utils.zeros((1, 196), int32=False)
        Y_points = blob_utils.zeros((1, 196), int32=False)
        Ind_points = blob_utils.zeros((1, 196), int32=True)
        I_points = blob_utils.zeros((1,196), int32=True)
        U_points = blob_utils.zeros((1, 196), int32=False)
        V_points = blob_utils.zeros((1, 196), int32=False)
        Uv_point_weights = blob_utils.zeros((1, 196), int32=False)
        #
        All_labels = -blob_utils.ones((1, M ** 2), int32=True) * 0 ## zeros
        All_Weights = -blob_utils.ones((1, M ** 2), int32=True) * 0 ## zeros
    #
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))
    #
    K = cfg.BODY_UV_RCNN.NUM_PATCHES
    #
    U_points = np.tile( U_points , [1,K+1] )
    V_points = np.tile( V_points , [1,K+1] )
    Uv_Weight_Points = np.zeros(U_points.shape)
    #
    for jjj in xrange(1,K+1):
        Uv_Weight_Points[ : , jjj * I_points.shape[1]  : (jjj+1) * I_points.shape[1] ] = ( I_points == jjj ).astype(np.float32)
    #
    ################
    # Update blobs dict with Mask R-CNN blobs
    ###############
    #
    blobs['body_uv_rois'] = np.array(rois_fg)
    blobs['roi_has_body_uv_int32'] = np.array(roi_has_mask).astype(np.int32)
    ##
    blobs['body_uv_ann_labels'] = np.array(All_labels).astype(np.int32)
    blobs['body_uv_ann_weights'] = np.array(All_Weights).astype(np.float32)
    #
    ##########################
    blobs['body_uv_X_points'] = X_points.astype(np.float32)
    blobs['body_uv_Y_points'] = Y_points.astype(np.float32)
    blobs['body_uv_Ind_points'] = Ind_points.astype(np.float32)
    blobs['body_uv_I_points'] = I_points.astype(np.float32)
    blobs['body_uv_U_points'] = U_points.astype(np.float32)  #### VERY IMPORTANT :   These are switched here :
    blobs['body_uv_V_points'] = V_points.astype(np.float32)
    blobs['body_uv_point_weights'] = Uv_Weight_Points.astype(np.float32)
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    IsFlipped = roidb['flipped']
    M = cfg.BODY_UV_RCNN.HEATMAP_SIZE
    #
    polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0]
    boxes_from_polys = [roidb['boxes'][i, :] for i in polys_gt_inds]
    if not (boxes_from_polys):
        pass
    else:
        boxes_from_polys = np.vstack(boxes_from_polys)
    boxes_from_polys = np.array(boxes_from_polys)

    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = np.zeros(blobs['labels_int32'].shape)

    if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)):
        rois_fg = sampled_boxes[fg_inds]
        #
        rois_fg.astype(np.float32, copy=False)
        boxes_from_polys.astype(np.float32, copy=False)
        #
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1)
        fg_inds = fg_inds[fg_polys_value > 0.7]

    if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)):
        for jj in fg_inds:
            roi_has_mask[jj] = 1

        # Create blobs for densepose supervision.
        ################################################## The mask
        All_labels = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)
        All_Weights = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)
        ################################################# The points
        X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True)
        I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True)
        U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196),
                                            int32=False)
        #################################################

        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        for i in range(rois_fg.shape[0]):
            #
            fg_polys_ind = polys_gt_inds[fg_polys_inds[i]]
            #
            Ilabel = segm_utils.GetDensePoseMask(
                roidb['dp_masks'][fg_polys_ind])
            #
            GT_I = np.array(roidb['dp_I'][fg_polys_ind])
            GT_U = np.array(roidb['dp_U'][fg_polys_ind])
            GT_V = np.array(roidb['dp_V'][fg_polys_ind])
            GT_x = np.array(roidb['dp_x'][fg_polys_ind])
            GT_y = np.array(roidb['dp_y'][fg_polys_ind])
            GT_weights = np.ones(GT_I.shape).astype(np.float32)
            #
            ## Do the flipping of the densepose annotation !
            if (IsFlipped):
                GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel = DP.get_symmetric_densepose(
                    GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel)
            #
            roi_fg = rois_fg[i]
            roi_gt = boxes_from_polys[fg_polys_inds[i], :]
            #
            x1 = roi_fg[0]
            x2 = roi_fg[2]
            y1 = roi_fg[1]
            y2 = roi_fg[3]
            #
            x1_source = roi_gt[0]
            x2_source = roi_gt[2]
            y1_source = roi_gt[1]
            y2_source = roi_gt[3]
            #
            x_targets = (np.arange(x1, x2, (x2 - x1) / M) -
                         x1_source) * (256. / (x2_source - x1_source))
            y_targets = (np.arange(y1, y2, (y2 - y1) / M) -
                         y1_source) * (256. / (y2_source - y1_source))
            #
            x_targets = x_targets[
                0:
                M]  ## Strangely sometimes it can be M+1, so make sure size is OK!
            y_targets = y_targets[0:M]
            #
            [X_targets, Y_targets] = np.meshgrid(x_targets, y_targets)
            New_Index = cv2.remap(Ilabel,
                                  X_targets.astype(np.float32),
                                  Y_targets.astype(np.float32),
                                  interpolation=cv2.INTER_NEAREST,
                                  borderMode=cv2.BORDER_CONSTANT,
                                  borderValue=(0))
            #
            All_L = np.zeros(New_Index.shape)
            All_W = np.ones(New_Index.shape)
            #
            All_L = New_Index
            #
            gt_length_x = x2_source - x1_source
            gt_length_y = y2_source - y1_source
            #
            GT_y = ((GT_y / 256. * gt_length_y) + y1_source - y1) * (M /
                                                                     (y2 - y1))
            GT_x = ((GT_x / 256. * gt_length_x) + x1_source - x1) * (M /
                                                                     (x2 - x1))
            #
            GT_I[GT_y < 0] = 0
            GT_I[GT_y > (M - 1)] = 0
            GT_I[GT_x < 0] = 0
            GT_I[GT_x > (M - 1)] = 0
            #
            points_inside = GT_I > 0
            GT_U = GT_U[points_inside]
            GT_V = GT_V[points_inside]
            GT_x = GT_x[points_inside]
            GT_y = GT_y[points_inside]
            GT_weights = GT_weights[points_inside]
            GT_I = GT_I[points_inside]
            #
            X_points[i, 0:len(GT_x)] = GT_x
            Y_points[i, 0:len(GT_y)] = GT_y
            Ind_points[i, 0:len(GT_I)] = i
            I_points[i, 0:len(GT_I)] = GT_I
            U_points[i, 0:len(GT_U)] = GT_U
            V_points[i, 0:len(GT_V)] = GT_V
            Uv_point_weights[i, 0:len(GT_weights)] = GT_weights
            #
            All_labels[i, :] = np.reshape(All_L.astype(np.int32), M**2)
            All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M**2)
            ##
    else:
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        #
        if (len(bg_inds) == 0):
            rois_fg = sampled_boxes[0].reshape((1, -1))
        else:
            rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))

        roi_has_mask[0] = 1
        #
        X_points = blob_utils.zeros((1, 196), int32=False)
        Y_points = blob_utils.zeros((1, 196), int32=False)
        Ind_points = blob_utils.zeros((1, 196), int32=True)
        I_points = blob_utils.zeros((1, 196), int32=True)
        U_points = blob_utils.zeros((1, 196), int32=False)
        V_points = blob_utils.zeros((1, 196), int32=False)
        Uv_point_weights = blob_utils.zeros((1, 196), int32=False)
        #
        All_labels = -blob_utils.ones((1, M**2), int32=True) * 0  ## zeros
        All_Weights = -blob_utils.ones((1, M**2), int32=True) * 0  ## zeros
    #
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))
    #
    K = cfg.BODY_UV_RCNN.NUM_PATCHES
    #
    U_points = np.tile(U_points, [1, K + 1])
    V_points = np.tile(V_points, [1, K + 1])
    Uv_Weight_Points = np.zeros(U_points.shape)
    #
    for jjj in xrange(1, K + 1):
        Uv_Weight_Points[:, jjj * I_points.shape[1]:(jjj + 1) *
                         I_points.shape[1]] = (I_points == jjj).astype(
                             np.float32)
    #
    ################
    # Update blobs dict with Mask R-CNN blobs
    ###############
    #
    blobs['body_uv_rois'] = np.array(rois_fg)
    blobs['roi_has_body_uv_int32'] = np.array(roi_has_mask).astype(np.int32)
    ##
    blobs['body_uv_ann_labels'] = np.array(All_labels).astype(np.int32)
    blobs['body_uv_ann_weights'] = np.array(All_Weights).astype(np.float32)
    #
    ##########################
    blobs['body_uv_X_points'] = X_points.astype(np.float32)
    blobs['body_uv_Y_points'] = Y_points.astype(np.float32)
    blobs['body_uv_Ind_points'] = Ind_points.astype(np.float32)
    blobs['body_uv_I_points'] = I_points.astype(np.float32)
    blobs['body_uv_U_points'] = U_points.astype(
        np.float32)  #### VERY IMPORTANT :   These are switched here :
    blobs['body_uv_V_points'] = V_points.astype(np.float32)
    blobs['body_uv_point_weights'] = Uv_Weight_Points.astype(np.float32)
Exemple #12
0
def _get_retinanet_blobs(foas, all_anchors, gt_boxes, gt_classes, im_width,
                         im_height):
    total_anchors = all_anchors.shape[0]
    logger.debug('Getting mad blobs: im_height {} im_width: {}'.format(
        im_height, im_width))

    inds_inside = np.arange(all_anchors.shape[0])
    anchors = all_anchors
    num_inside = len(inds_inside)

    logger.debug('total_anchors: {}'.format(total_anchors))
    logger.debug('inds_inside: {}'.format(num_inside))
    logger.debug('anchors.shape: {}'.format(anchors.shape))

    # Compute anchor labels:
    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
    labels = np.empty((num_inside, ), dtype=np.float32)
    labels.fill(-1)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside),
                                                anchor_to_gt_argmax]

        # Map from gt box to an anchor that has highest overlap
        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
        # For each gt box, amount of overlap with most overlapping anchor
        gt_to_anchor_max = anchor_by_gt_overlap[
            gt_to_anchor_argmax,
            np.arange(anchor_by_gt_overlap.shape[1])]
        # Find all anchors that share the max overlap amount
        # (this includes many ties)
        anchors_with_max_overlap = np.where(
            anchor_by_gt_overlap == gt_to_anchor_max)[0]

        # Fg label: for each gt use anchors with highest overlap
        # (including ties)
        gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap]
        labels[anchors_with_max_overlap] = gt_classes[gt_inds]
        # Fg label: above threshold IOU
        inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP
        gt_inds = anchor_to_gt_argmax[inds]
        labels[inds] = gt_classes[gt_inds]

    fg_inds = np.where(labels >= 1)[0]
    bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0]
    labels[bg_inds] = 0
    num_fg, num_bg = len(fg_inds), len(bg_inds)

    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_targets[fg_inds, :] = data_utils.compute_targets(
        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :])

    # Map up to original set of anchors
    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = data_utils.unmap(bbox_targets,
                                    total_anchors,
                                    inds_inside,
                                    fill=0)

    # Split the generated labels, etc. into labels per each field of anchors
    blobs_out = []
    start_idx = 0
    for foa in foas:
        H = foa.field_size
        W = foa.field_size
        end_idx = start_idx + H * W
        _labels = labels[start_idx:end_idx]
        _bbox_targets = bbox_targets[start_idx:end_idx, :]
        start_idx = end_idx

        # labels output with shape (1, height, width)
        _labels = _labels.reshape((1, 1, H, W))
        # bbox_targets output with shape (1, 4 * A, height, width)
        _bbox_targets = _bbox_targets.reshape(
            (1, H, W, 4)).transpose(0, 3, 1, 2)
        stride = foa.stride
        w = int(im_width / stride)
        h = int(im_height / stride)

        # data for select_smooth_l1 loss
        num_classes = cfg.MODEL.NUM_CLASSES - 1
        inds_4d = np.where(_labels > 0)
        M = len(inds_4d)
        _roi_bbox_targets = np.zeros((0, 4))
        _roi_fg_bbox_locs = np.zeros((0, 4))
        if M > 0:
            im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3]
            _roi_bbox_targets = np.zeros((len(im_inds), 4))
            _roi_fg_bbox_locs = np.zeros((len(im_inds), 4))
            lbls = _labels[im_inds, :, y, x]
            for i, lbl in enumerate(lbls):
                l = lbl[0] - 1
                if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                    l = 0
                assert l >= 0 and l < num_classes, 'label out of the range'
                _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]]
                _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]])
        blobs_out.append(
            dict(
                retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32),
                retnet_roi_bbox_targets=_roi_bbox_targets.astype(np.float32),
                retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(np.float32),
            ))
    out_num_fg = np.array([num_fg + 1.0], dtype=np.float32)
    out_num_bg = (np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) +
                  out_num_fg * (cfg.MODEL.NUM_CLASSES - 2))
    return blobs_out, out_num_fg, out_num_bg
def _merge_proposal_boxes_into_roidb(roidb, box_list):
    """Add proposal boxes to each roidb entry."""
    assert len(box_list) == len(roidb)
    for i, entry in enumerate(roidb):
        # rois
        boxes = box_list[i]
        num_boxes = boxes.shape[0]
        # (num boxes, num class + 1)
        gt_overlaps = np.zeros(
            (num_boxes, entry['gt_overlaps'].shape[1]),
            dtype=entry['gt_overlaps'].dtype
        )
        # (num boxes,)
        box_to_gt_ind_map = -np.ones(
            (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype
        )

        # Note: unlike in other places, here we intentionally include all gt
        # rois, even ones marked as crowd. Boxes that overlap with crowds will
        # be filtered out later (see: _filter_crowd_proposals).
        # gt_classes标记每个gt属于哪个类别,可以获得所有gt的所在的位置
        gt_inds = np.where(entry['gt_classes'] > 0)[0]
        if len(gt_inds) > 0:
            # gt box
            gt_boxes = entry['boxes'][gt_inds, :]
            # gt class
            gt_classes = entry['gt_classes'][gt_inds]

            # 计算anchor生成的box和gt的iou
            proposal_to_gt_overlaps = box_utils.bbox_overlaps(
                boxes.astype(dtype=np.float32, copy=False),
                gt_boxes.astype(dtype=np.float32, copy=False)
            )
            # Gt box that overlaps each input box the most
            # (ties are broken arbitrarily by class order)
            # 计算每个box与哪个gt box的iou最大
            argmaxes = proposal_to_gt_overlaps.argmax(axis=1)
            # Amount of that overlap
            maxes = proposal_to_gt_overlaps.max(axis=1)
            # Those boxes with non-zero overlap with gt boxes
            I = np.where(maxes > 0)[0]

            # Record max overlaps with the class of the appropriate gt box
            # 对proposals box的gt_overlaps进行赋值
            gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            # 每个box对应的gt的索引,默认值为-1
            box_to_gt_ind_map[I] = gt_inds[argmaxes[I]]

        # 添加新的box
        entry['boxes'] = np.append(
            entry['boxes'],
            boxes.astype(entry['boxes'].dtype, copy=False),
            axis=0
        )

        # 全部添加为0,则大于0的就是gt
        entry['gt_classes'] = np.append(
            entry['gt_classes'],
            np.zeros((num_boxes), dtype=entry['gt_classes'].dtype)
        )

        # 全部添加为0
        entry['seg_areas'] = np.append(
            entry['seg_areas'],
            np.zeros((num_boxes), dtype=entry['seg_areas'].dtype)
        )

        # 添加每个box和gt box的iou
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])

        # 全部添加为0
        entry['is_crowd'] = np.append(
            entry['is_crowd'],
            np.zeros((num_boxes), dtype=entry['is_crowd'].dtype)
        )

        # 添加每个box对应的gt box索引
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'],
            box_to_gt_ind_map.astype(
                entry['box_to_gt_ind_map'].dtype, copy=False
            )
        )
def _merge_proposal_boxes_into_roidb(roidb, box_list, model=None):
    """Add proposal boxes to each roidb entry."""
    assert len(box_list) == len(roidb)
    for i, entry in enumerate(roidb):

        boxes = box_list[i]

        if cfg.TRAIN.DOMAIN_ADAPTATION:
            rois_per_image = min(len(boxes), int(cfg.TRAIN.BATCH_SIZE_PER_IM))
            entry['da_boxes'] = np.array(boxes[:rois_per_image],
                                         dtype=np.float32)
            if not entry['is_source']:
                weight = model.class_weight_db.get_avg_pada_weight()
                ims = cfg.TRAIN.IMS_PER_BATCH
                source_imgs = ims - ims // 2
                target_imgs = ims // 2
                weight *= source_imgs / target_imgs
                entry['pada_roi_weights'] = np.full(rois_per_image,
                                                    weight,
                                                    dtype=np.float32)
                # print('pada_dc_target_weights:',rois_per_image*weight)
                continue  # we do not supervise on target set rois.

        num_boxes = boxes.shape[0]  #the rpn_rois for this image=entry
        gt_overlaps = np.zeros((num_boxes, entry['gt_overlaps'].shape[1]),
                               dtype=entry['gt_overlaps'].dtype)
        box_to_gt_ind_map = -np.ones(
            (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype)

        # Note: unlike in other places, here we intentionally include all gt
        # rois, even ones marked as crowd. Boxes that overlap with crowds will
        # be filtered out later (see: _filter_crowd_proposals).
        gt_inds = np.where(entry['gt_classes'] > 0)[0]
        if len(gt_inds) > 0:
            gt_boxes = entry['boxes'][gt_inds, :]
            gt_classes = entry['gt_classes'][gt_inds]
            proposal_to_gt_overlaps = box_utils.bbox_overlaps(
                boxes.astype(dtype=np.float32, copy=False),
                gt_boxes.astype(dtype=np.float32, copy=False))
            # Gt box that overlaps each input box the most
            # (ties are broken arbitrarily by class order)
            argmaxes = proposal_to_gt_overlaps.argmax(axis=1)
            # Amount of that overlap
            maxes = proposal_to_gt_overlaps.max(axis=1)
            # Those boxes with non-zero overlap with gt boxes
            I = np.where(maxes > 0)[0]
            # Record max overlaps with the class of the appropriate gt box
            gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
            box_to_gt_ind_map[I] = gt_inds[argmaxes[I]]
        entry['boxes'] = np.append(entry['boxes'],
                                   boxes.astype(entry['boxes'].dtype,
                                                copy=False),
                                   axis=0)
        entry['gt_classes'] = np.append(
            entry['gt_classes'],
            np.zeros((num_boxes), dtype=entry['gt_classes'].dtype))
        entry['seg_areas'] = np.append(
            entry['seg_areas'],
            np.zeros((num_boxes), dtype=entry['seg_areas'].dtype))
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(
            entry['is_crowd'],
            np.zeros((num_boxes), dtype=entry['is_crowd'].dtype))
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'],
            box_to_gt_ind_map.astype(entry['box_to_gt_ind_map'].dtype,
                                     copy=False))

        # for DA:
        if cfg.TRAIN.PADA and cfg.TRAIN.DOMAIN_ADAPTATION:
            # we pre-calcluate the weights here

            # keep_inds = np.arange(rois_per_image)

            # boxes = boxes[:rois_per_image] # already submitted as 'da_boxes'
            maxes = maxes[:rois_per_image]
            argmaxes = argmaxes[:rois_per_image]
            labels = gt_classes[argmaxes]
            assert (labels[maxes > 0] != 0).all()

            # model.class_weight_db.set_maxes(maxes)

            class_weights = model.class_weight_db.class_weights

            # Each roi has a fg and a bg part. The portion between these parts is determined by the IoU (scores).
            # The fg part is weighted by PADA with the corresponding class weights, and the bg part is set
            # to be on average 75% of the total weight: w_pada * fg + bg
            # The bg wieghts can also be less than 75% if there are not much bg rois, because w_bg may be at most 1.0 per roi.

            pada_weights = maxes * class_weights[labels]
            # pada_fg_weight = pada_weights.sum()
            # fg_weight = maxes.sum()
            # avg_pada_roi_weight = pada_fg_weight / (fg_weight + np.finfo(float).eps)
            # avg_pada_roi_weight = model.class_weight_db.update_get_avg_pada_weight(avg_pada_roi_weight,fg_weight)
            avg_pada_roi_weight = model.class_weight_db.get_avg_pada_weight()
            bg_weights = (
                1 - maxes
            ) * avg_pada_roi_weight  # scale bg rois similar as average fg scale.
            box_weights = pada_weights + bg_weights  # each roi is both partially fg and bg, weighted by IoU.

            entry['pada_roi_weights'] = np.array(box_weights, dtype=np.float32)
Exemple #15
0
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes):
    total_anchors = all_anchors.shape[0]
    straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH

    if straddle_thresh >= 0:
        # Only keep anchors inside the image by a margin of straddle_thresh
        # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all
        # anchors
        inds_inside = np.where(
            (all_anchors[:, 0] >= -straddle_thresh) &
            (all_anchors[:, 1] >= -straddle_thresh) &
            (all_anchors[:, 2] < im_width + straddle_thresh) &
            (all_anchors[:, 3] < im_height + straddle_thresh)
        )[0]
        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
    else:
        inds_inside = np.arange(all_anchors.shape[0])
        anchors = all_anchors
    num_inside = len(inds_inside)

    logger.debug('total_anchors: {}'.format(total_anchors))
    logger.debug('inds_inside: {}'.format(num_inside))
    logger.debug('anchors.shape: {}'.format(anchors.shape))

    # Compute anchor labels:
    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
    labels = np.empty((num_inside, ), dtype=np.int32)
    labels.fill(-1)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside),
                                                anchor_to_gt_argmax]

        # Map from gt box to an anchor that has highest overlap
        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
        # For each gt box, amount of overlap with most overlapping anchor
        gt_to_anchor_max = anchor_by_gt_overlap[
            gt_to_anchor_argmax,
            np.arange(anchor_by_gt_overlap.shape[1])
        ]
        # Find all anchors that share the max overlap amount
        # (this includes many ties)
        anchors_with_max_overlap = np.where(
            anchor_by_gt_overlap == gt_to_anchor_max
        )[0]

        # Fg label: for each gt use anchors with highest overlap
        # (including ties)
        labels[anchors_with_max_overlap] = 1
        # Fg label: above threshold IOU
        labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False
        )
        labels[disable_inds] = -1
    fg_inds = np.where(labels == 1)[0]

    # subsample negative labels if we have too many
    # (samples with replacement, but since the set of bg inds is large most
    # samples will not have repeats)
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1)
    bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0]
    if len(bg_inds) > num_bg:
        enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)]
        labels[enable_inds] = 0
    bg_inds = np.where(labels == 0)[0]

    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_targets[fg_inds, :] = data_utils.compute_targets(
        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]
    )

    # Bbox regression loss has the form:
    #   loss(x) = weight_outside * L(weight_inside * x)
    # Inside weights allow us to set zero loss on an element-wise basis
    # Bbox regression is only trained on positive examples so we set their
    # weights to 1.0 (or otherwise if config is different) and 0 otherwise
    bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0)

    # The bbox regression loss only averages by the number of images in the
    # mini-batch, whereas we need to average by the total number of example
    # anchors selected
    # Outside weights are used to scale each element-wise loss so the final
    # average over the mini-batch is correct
    bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    # uniform weighting of examples (given non-uniform sampling)
    num_examples = np.sum(labels >= 0)
    bbox_outside_weights[labels == 1, :] = 1.0 / num_examples
    bbox_outside_weights[labels == 0, :] = 1.0 / num_examples

    # Map up to original set of anchors
    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = data_utils.unmap(
        bbox_targets, total_anchors, inds_inside, fill=0
    )
    bbox_inside_weights = data_utils.unmap(
        bbox_inside_weights, total_anchors, inds_inside, fill=0
    )
    bbox_outside_weights = data_utils.unmap(
        bbox_outside_weights, total_anchors, inds_inside, fill=0
    )

    # Split the generated labels, etc. into labels per each field of anchors
    blobs_out = []
    start_idx = 0
    for foa in foas:
        H = foa.field_size
        W = foa.field_size
        A = foa.num_cell_anchors
        end_idx = start_idx + H * W * A
        _labels = labels[start_idx:end_idx]
        _bbox_targets = bbox_targets[start_idx:end_idx, :]
        _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :]
        _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :]
        start_idx = end_idx

        # labels output with shape (1, A, height, width)
        _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2)
        # bbox_targets output with shape (1, 4 * A, height, width)
        _bbox_targets = _bbox_targets.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_inside_weights output with shape (1, 4 * A, height, width)
        _bbox_inside_weights = _bbox_inside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_outside_weights output with shape (1, 4 * A, height, width)
        _bbox_outside_weights = _bbox_outside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        blobs_out.append(
            dict(
                rpn_labels_int32_wide=_labels,
                rpn_bbox_targets_wide=_bbox_targets,
                rpn_bbox_inside_weights_wide=_bbox_inside_weights,
                rpn_bbox_outside_weights_wide=_bbox_outside_weights
            )
        )
    return blobs_out[0] if len(blobs_out) == 1 else blobs_out
Exemple #16
0
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    IsFlipped = roidb['flipped']
    M = cfg.BODY_UV_RCNN.HEATMAP_SIZE
    #
    polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0]
    boxes_from_polys = [roidb['boxes'][i, :] for i in polys_gt_inds]
    input_w = roidb['input_width']
    input_h = roidb['input_height']
    if not (boxes_from_polys):
        pass
    else:
        boxes_from_polys = np.vstack(boxes_from_polys)
    boxes_from_polys = np.array(boxes_from_polys)

    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = np.zeros(blobs['labels_int32'].shape)

    if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)):
        rois_fg = sampled_boxes[fg_inds]
        #
        rois_fg.astype(np.float32, copy=False)
        boxes_from_polys.astype(np.float32, copy=False)
        #
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1)
        fg_inds = fg_inds[fg_polys_value > 0.7]
    all_person_masks = np.zeros((int(input_h), int(input_w)), dtype=np.float32)
    if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)):
        # controle the number of roi
        if fg_inds.shape[0] > 6:
            fg_inds = fg_inds[:6]
        for jj in fg_inds:
            roi_has_mask[jj] = 1

        # Create blobs for densepose supervision.
        ################################################## The mask
        All_labels = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)
        All_Weights = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)
        ################################################# The points
        X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True)
        I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True)
        U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196),
                                            int32=False)
        #################################################

        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        rois = np.copy(rois_fg)
        for i in range(rois_fg.shape[0]):
            #
            fg_polys_ind = polys_gt_inds[fg_polys_inds[i]]
            #
            Ilabel = segm_utils.GetDensePoseMask(
                roidb['dp_masks'][fg_polys_ind])
            #
            GT_I = np.array(roidb['dp_I'][fg_polys_ind])
            GT_U = np.array(roidb['dp_U'][fg_polys_ind])
            GT_V = np.array(roidb['dp_V'][fg_polys_ind])
            GT_x = np.array(roidb['dp_x'][fg_polys_ind])
            GT_y = np.array(roidb['dp_y'][fg_polys_ind])
            GT_weights = np.ones(GT_I.shape).astype(np.float32)
            #
            ## Do the flipping of the densepose annotation !
            if (IsFlipped):
                GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel = DP.get_symmetric_densepose(
                    GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel)
            #
            roi_fg = rois_fg[i]
            roi_gt = boxes_from_polys[fg_polys_inds[i], :]
            #
            x1 = roi_fg[0]
            x2 = roi_fg[2]
            y1 = roi_fg[1]
            y2 = roi_fg[3]
            #
            x1_source = roi_gt[0]
            x2_source = roi_gt[2]
            y1_source = roi_gt[1]
            y2_source = roi_gt[3]
            #
            x_targets = (np.arange(x1, x2, (x2 - x1) / M) -
                         x1_source) * (256. / (x2_source - x1_source))
            y_targets = (np.arange(y1, y2, (y2 - y1) / M) -
                         y1_source) * (256. / (y2_source - y1_source))
            #
            x_targets = x_targets[
                0:
                M]  ## Strangely sometimes it can be M+1, so make sure size is OK!
            y_targets = y_targets[0:M]
            #
            [X_targets, Y_targets] = np.meshgrid(x_targets, y_targets)
            New_Index = cv2.remap(Ilabel,
                                  X_targets.astype(np.float32),
                                  Y_targets.astype(np.float32),
                                  interpolation=cv2.INTER_NEAREST,
                                  borderMode=cv2.BORDER_CONSTANT,
                                  borderValue=(0))
            #
            All_L = np.zeros(New_Index.shape)
            All_W = np.ones(New_Index.shape)
            #
            All_L = New_Index
            #
            gt_length_x = x2_source - x1_source
            gt_length_y = y2_source - y1_source
            #
            GT_y = ((GT_y / 256. * gt_length_y) + y1_source - y1) * (M /
                                                                     (y2 - y1))
            GT_x = ((GT_x / 256. * gt_length_x) + x1_source - x1) * (M /
                                                                     (x2 - x1))
            #
            GT_I[GT_y < 0] = 0
            GT_I[GT_y > (M - 1)] = 0
            GT_I[GT_x < 0] = 0
            GT_I[GT_x > (M - 1)] = 0
            #
            points_inside = GT_I > 0
            GT_U = GT_U[points_inside]
            GT_V = GT_V[points_inside]
            GT_x = GT_x[points_inside]
            GT_y = GT_y[points_inside]
            GT_weights = GT_weights[points_inside]
            GT_I = GT_I[points_inside]

            #
            X_points[i, 0:len(GT_x)] = GT_x
            Y_points[i, 0:len(GT_y)] = GT_y
            Ind_points[i, 0:len(GT_I)] = i
            I_points[i, 0:len(GT_I)] = GT_I
            U_points[i, 0:len(GT_U)] = GT_U
            V_points[i, 0:len(GT_V)] = GT_V
            Uv_point_weights[i, 0:len(GT_weights)] = GT_weights
            #
            All_labels[i, :] = np.reshape(All_L.astype(np.int32), M**2)
            All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M**2)
            ##
            # proposal based segmentation
            p_mask = (Ilabel > 0).astype(np.float32)
            target_roi = roi_gt * im_scale
            p_mask = cv2.resize(p_mask, (int(target_roi[2] - target_roi[0]),
                                         int(target_roi[3] - target_roi[1])))
            p_mask = (p_mask > 0.5).astype(np.float32)
            start_y, start_x = int(target_roi[1]), int(target_roi[0])
            end_y, end_x = start_y + p_mask.shape[0], start_x + p_mask.shape[1]
            # if all_person_masks[start_y:end_y, start_x:end_x].shape[0]!=p_mask.shape[0] or all_person_masks[start_y:end_y, start_x:end_x].shape[1]!=p_mask.shape[1]:
            #     print('shape exception:',all_person_masks[start_y:end_y, start_x:end_x].shape,p_mask.shape)
            #     print('roi:',target_roi)
            #     print(start_y,end_y, start_x,end_x)
            #     print('input image:',all_person_masks.shape)
            #     assert False
            all_person_masks[start_y:end_y, start_x:end_x] = p_mask
    else:
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        #
        if (len(bg_inds) == 0):
            rois_fg = sampled_boxes[0].reshape((1, -1))
        else:
            rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))

        roi_has_mask[0] = 1
        #
        X_points = blob_utils.zeros((1, 196), int32=False)
        Y_points = blob_utils.zeros((1, 196), int32=False)
        Ind_points = blob_utils.zeros((1, 196), int32=True)
        I_points = blob_utils.zeros((1, 196), int32=True)
        U_points = blob_utils.zeros((1, 196), int32=False)
        V_points = blob_utils.zeros((1, 196), int32=False)
        Uv_point_weights = blob_utils.zeros((1, 196), int32=False)
        #
        All_labels = -blob_utils.ones((1, M**2), int32=True) * 0  ## zeros
        All_Weights = -blob_utils.ones((1, M**2), int32=True) * 0  ## zeros
    #
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))
    #
    K = cfg.BODY_UV_RCNN.NUM_PATCHES
    #
    u_points = np.copy(U_points)
    v_points = np.copy(V_points)
    U_points = np.tile(U_points, [1, K + 1])
    V_points = np.tile(V_points, [1, K + 1])
    Uv_Weight_Points = np.zeros(U_points.shape)
    #
    for jjj in xrange(1, K + 1):
        Uv_Weight_Points[:, jjj * I_points.shape[1]:(jjj + 1) *
                         I_points.shape[1]] = (I_points == jjj).astype(
                             np.float32)
    #

    # person masks here
    person_mask = (All_labels > 0).astype(np.int32)
    # extra
    # index_targets = np.zeros_like(person_mask).reshape((-1,M,M)).astype(np.int32)
    # index_targets_weights = np.zeros_like(index_targets)
    # u_targets = np.zeros((index_targets.shape[0],25,M,M),dtype=np.float32)
    # v_targets = np.zeros((index_targets.shape[0], 25, M, M),dtype=np.float32)
    # uv_weights = np.zeros((index_targets.shape[0], 25, M, M),dtype=np.float32)
    # for ibatch in range(index_targets.shape[0]):
    #     for i_surface in range(1,K+1):
    #         points_i = I_points[ibatch] == i_surface
    #         if len(points_i)>0:
    #             points_x = np.asarray(X_points[ibatch][points_i], dtype=np.int32).reshape((-1,1))
    #             points_y = np.asarray(Y_points[ibatch][points_i], dtype=np.int32).reshape((-1,1))
    #             points_u = u_points[ibatch][points_i].reshape((1, -1))
    #             points_v = v_points[ibatch][points_i].reshape((1, -1))
    #             locs = np.hstack([points_x, points_y])
    #
    #             for step in [1]:
    #                 x_plus_locs = np.copy(points_x) + step
    #                 y_plus_locs = np.copy(points_y) + step
    #                 x_minus_locs = np.copy(points_x) - step
    #                 y_minus_locs = np.copy(points_y) - step
    #
    #                 locs = np.vstack([locs, np.hstack([x_plus_locs, y_plus_locs])])
    #                 locs = np.vstack([locs, np.hstack([x_plus_locs, y_minus_locs])])
    #                 locs = np.vstack([locs, np.hstack([x_minus_locs, y_plus_locs])])
    #                 locs = np.vstack([locs, np.hstack([x_minus_locs, y_minus_locs])])
    #
    #             locs[locs < 0] = 0.
    #             locs[locs >= M] = M - 1
    #
    #             points_u = np.repeat(points_u, 5, axis=0).reshape((-1))
    #             points_v = np.repeat(points_v, 5, axis=0).reshape((-1))
    #
    #
    #             index_targets[ibatch][locs[:,1], locs[:, 0]] = i_surface
    #             index_targets_weights[ibatch][locs[:, 1], locs[:, 0]] = 1
    #             u_targets[ibatch, i_surface][locs[:, 1], locs[:, 0]] = points_u
    #             v_targets[ibatch, i_surface][locs[:, 1], locs[:, 0]] = points_v
    #             uv_weights[ibatch, i_surface][locs[:, 1], locs[:, 0]] = 1.
    #     if random.random() <= 0.5:
    #         _,index_targets[ibatch], v_targets[ibatch], v_targets[ibatch], index_targets_weights[ibatch], uv_weights[ibatch] = expand_dp_targets(All_labels[ibatch].reshape((M,M)),
    #                                                                                                                                              index_targets[ibatch], v_targets[ibatch],
    #                                                                                                                                              v_targets[ibatch],
    #                                                                                                                                              index_targets_weights[ibatch],
    #                                                                                                                                              uv_weights[ibatch])

    # proposal all masks here

    if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)):
        proposal_all_mask = blob_utils.zeros((fg_inds.shape[0], M, M),
                                             int32=True)
        for i in range(rois_fg.shape[0]):

            roi_fg = rois_fg[i][1:]

            proposal_mask = all_person_masks[int(roi_fg[1]):int(roi_fg[3]),
                                             int(roi_fg[0]):int(roi_fg[2])]
            proposal_mask = cv2.resize(proposal_mask, (M, M))
            proposal_mask = (proposal_mask > 0.5).astype(np.int32)
            proposal_all_mask[i] = proposal_mask
    else:
        proposal_all_mask = -blob_utils.ones(
            (1, M, M), int32=True) * 0  ## zeros

    ################
    # Update blobs dict with Mask R-CNN blobs
    ###############
    #
    blobs['body_mask_labels'] = person_mask.reshape((-1, M, M))
    blobs['body_uv_rois'] = np.array(rois_fg)
    blobs['roi_has_body_uv_int32'] = np.array(roi_has_mask).astype(np.int32)
    ##
    blobs['body_uv_ann_labels'] = np.array(All_labels).astype(np.int32)
    blobs['body_uv_ann_weights'] = np.array(All_Weights).astype(np.float32)
    #
    ##########################
    blobs['body_uv_X_points'] = X_points.astype(np.float32)
    blobs['body_uv_Y_points'] = Y_points.astype(np.float32)
    blobs['body_uv_Ind_points'] = Ind_points.astype(np.float32)
    blobs['body_uv_I_points'] = I_points.astype(np.float32)
    blobs['body_uv_U_points'] = U_points.astype(
        np.float32)  #### VERY IMPORTANT :   These are switched here :
    blobs['body_uv_V_points'] = V_points.astype(np.float32)
    blobs['body_uv_point_weights'] = Uv_Weight_Points.astype(np.float32)
    ###################
    # extra
    # blobs['body_uv_Index_targets'] = index_targets
    # blobs['body_uv_Index_targets_weights'] = index_targets_weights.astype(np.float32)
    # blobs['body_uv_U_targets'] = u_targets
    # blobs['body_uv_V_targets'] = v_targets
    # blobs['body_uv_weights'] = uv_weights
    ################
    # add by wxh
    if cfg.BODY_UV_RCNN.USE_CLS_EMBS:
        fg_embs, bg_embs, fg_weights, bg_weights = masks_to_embs(
            All_labels.reshape((-1, M, M)))
        # print('fg',fg_embs.max(), fg_embs.min())
        # print('bg',bg_embs.max(), bg_embs.min())
        fg_norms = np.sum(fg_embs, axis=(1, 2))
        fg_norms[fg_norms != 0] = 56. * 56. / fg_norms[fg_norms != 0]
        bg_norms = np.sum(bg_embs, axis=(1, 2))
        bg_norms[bg_norms != 0] = 56. * 56. / bg_norms[bg_norms != 0]

        blobs['fg_mask'] = np.repeat(np.reshape(fg_embs, (-1, 1, M, M)),
                                     2,
                                     axis=1)
        blobs['bg_mask'] = np.repeat(np.reshape(bg_embs, (-1, 1, M, M)),
                                     2,
                                     axis=1)
        blobs['fg_norm'] = np.repeat(np.reshape(fg_norms, (-1, 1)), 2, axis=1)
        blobs['bg_norm'] = np.repeat(np.reshape(bg_norms, (-1, 1)), 2, axis=1)
        blobs['mask_emb_fg_labels'] = np.ones((fg_embs.shape[0], 1),
                                              dtype=np.int32)
        blobs['mask_emb_bg_labels'] = np.zeros((bg_embs.shape[0], 1),
                                               dtype=np.int32)
        blobs['mask_emb_weights'] = np.vstack([fg_weights,
                                               bg_weights]).reshape(
                                                   (-1, 1)).astype(np.float32)
    if cfg.BODY_UV_RCNN.USE_BOX_ALL_MASKS:
        blobs['body_masks_wrt_box'] = proposal_all_mask
Exemple #17
0
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.MRCNN.RESOLUTION

    input_w = roidb['input_width']
    input_h = roidb['input_height']
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1
    mask_fg_rois_per_this_image = cfg.MRCNN.MAX_ROIS_PER_IM
    if fg_inds.shape[0] > 0:
        if fg_inds.size > mask_fg_rois_per_this_image:
            fg_inds = np.random.choice(fg_inds,
                                       size=mask_fg_rois_per_this_image,
                                       replace=False)
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)
        all_person_masks = np.zeros(
            (int(input_h / im_scale), int(input_w / im_scale)),
            dtype=np.float32)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image
            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, :] = np.reshape(mask, M**2)
            # to an box_h x box_w binary image
            mask_wrt_bbox = segm_utils.convert_polys_to_mask_wrt_box(
                poly_gt, roi_fg)
            start_y, start_x = int(roi_fg[1]), int(roi_fg[0])
            end_y, end_x = start_y + mask_wrt_bbox.shape[
                0], start_x + mask_wrt_bbox.shape[1]
            all_person_masks[start_y:end_y, start_x:end_x] = mask_wrt_bbox
        proposal_all_mask = blob_utils.zeros((fg_inds.shape[0], M, M),
                                             int32=True)
        for i in range(rois_fg.shape[0]):
            roi_fg = rois_fg[i]
            w = roi_fg[2] - roi_fg[0]
            h = roi_fg[3] - roi_fg[1]
            w = int(np.maximum(w, 1))
            h = int(np.maximum(h, 1))
            proposal_mask = all_person_masks[int(roi_fg[1]):int(roi_fg[1]) + h,
                                             int(roi_fg[0]):int(roi_fg[0]) + w]
            # proposal_mask = proposal_mask.astype(np.float32)
            proposal_mask = cv2.resize(proposal_mask, (M, M))
            proposal_mask = (proposal_mask > 0.5).astype(np.int32)
            proposal_all_mask[i] = proposal_mask
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1
        proposal_all_mask = -blob_utils.ones((1, M, M), int32=True)

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks,
                                                       mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_int32'] = masks
    #    blobs['mask_labels'] = np.argmax(masks.reshape((-1,cfg.MODEL.NUM_CLASSES,M,M)),axis=1).reshape((-1,M,M)).astype(np.int32)
    #    blobs['mask_weights'] = np.ones(blobs['mask_labels'].shape, dtype=np.float32)
    # add by wxh
    if cfg.MRCNN.USE_CLS_EMBS:
        fg_embs, bg_embs, fg_weights, bg_weights = masks_to_embs(
            masks.reshape((-1, cfg.MODEL.NUM_CLASSES, M, M)))
        # print('fg',fg_embs.max(), fg_embs.min())
        # print('bg',bg_embs.max(), bg_embs.min())
        fg_norms = np.sum(fg_embs, axis=(1, 2))
        fg_norms[fg_norms != 0] = 28. * 28. / (fg_norms[fg_norms != 0] + 1e-6)
        bg_norms = np.sum(bg_embs, axis=(1, 2))
        bg_norms[bg_norms != 0] = 28. * 28. / (bg_norms[bg_norms != 0] + 1e-6)

        blobs['fg_mask'] = np.repeat(np.reshape(fg_embs, (-1, 1, M, M)),
                                     2,
                                     axis=1)
        blobs['bg_mask'] = np.repeat(np.reshape(bg_embs, (-1, 1, M, M)),
                                     2,
                                     axis=1)
        blobs['fg_norm'] = np.repeat(np.reshape(fg_norms, (-1, 1)), 2, axis=1)
        blobs['bg_norm'] = np.repeat(np.reshape(bg_norms, (-1, 1)), 2, axis=1)

        blobs['mask_emb_fg_labels'] = np.ones((fg_embs.shape[0], 1),
                                              dtype=np.int32)
        blobs['mask_emb_bg_labels'] = np.zeros((bg_embs.shape[0], 1),
                                               dtype=np.int32)


#        blobs['mask_emb_weights'] = np.vstack([fg_weights, bg_weights]).reshape((-1,1)).astype(np.float32)
    if cfg.MRCNN.BBOX_CASCADE_MASK_ON:
        blobs['inter_masks_int32'] = proposal_all_mask
    def forward(self, inputs, outputs):
        """See modeling.detector.AddBBoxAccuracy for inputs/outputs
        documentation.
        """

        # predicted bbox deltas, shape为(R, C*4)
        bbox_deltas = inputs[0].data
        # proposals的坐标集合, shape为(R, 5)
        bbox_data = inputs[1].data
        assert bbox_data.shape[1] == 5
        ### bbox_prior为所有的proposals坐标, shape为(R, 4)
        bbox_prior = bbox_data[:, 1:]

        # labels
        labels = inputs[2].data

        # mapped gt boxes
        mapped_gt_boxes = inputs[3].data
        gt_boxes = mapped_gt_boxes[:, :4]
        max_overlap = mapped_gt_boxes[:, 4]

        # bbox iou only for fg and non-gt boxes
        ###这里的labels指的是mapped_gt_bbox对应的labels吧???
        ###同时一移除所有的gt boxes
        ###相当于对这些gt bbox或proposals进行筛选
        keep_inds = np.where((labels > 0) & (max_overlap < 1.0))[0]
        ###所有符合要求的proposals个数
        num_boxes = keep_inds.size
        bbox_deltas = bbox_deltas[keep_inds, :]
        bbox_prior = bbox_prior[keep_inds, :]
        labels = labels[keep_inds]
        gt_boxes = gt_boxes[keep_inds, :]
        max_overlap = max_overlap[keep_inds]

        ### 关于AGNOSTIC_BBOX_REG 这个什么意思我始终云里雾里
        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG or num_boxes == 0:

            bbox_deltas = bbox_deltas[:, -4:]
        else:
            ### 将bbox_deltas的数据结构重新组织,即只保留bbox_deltas中
            ### 每一组回归参数对应的类别和labels(对应的gt真值)类别相同的回归参数
            ### 处理后的bbox_deltas的shape为(num_boxes, 4)
            bbox_deltas = np.vstack([
                bbox_deltas[i, labels[i] * 4:labels[i] * 4 + 4]
                for i in range(num_boxes)
            ])

        ### 通过bbox_transform函数将得到的proposals经过回归参数回归后
        ### 得到预测框predicted_bboxes,注意_bbox_reg_weights
        pred_boxes = box_utils.bbox_transform(bbox_prior, bbox_deltas,
                                              self._bbox_reg_weights)

        #####平均iou初值为0
        avg_iou = 0.
        pre_avg_iou = sum(max_overlap)
        for i in range(num_boxes):
            ###第i个gt_box(对应于第i个pred_bbox)的坐标值
            gt_box = gt_boxes[i, :]
            ###第i个pred_box的坐标值
            pred_box = pred_boxes[i, :]
            ###计算gt_box与pred_box之间的IOU
            tmp_iou = box_utils.bbox_overlaps(
                gt_box[np.newaxis, :].astype(dtype=np.float32, copy=False),
                pred_box[np.newaxis, :].astype(dtype=np.float32, copy=False),
            )
            avg_iou += tmp_iou[0]
        if num_boxes > 0:
            avg_iou /= num_boxes
            pre_avg_iou /= num_boxes
        ### 即outputs【0】--->本stage的avg_iou
        ###  outputs[1]----->上一个stage的avg_iou
        outputs[0].reshape([1])
        outputs[0].data[...] = avg_iou
        outputs[1].reshape([1])
        outputs[1].data[...] = pre_avg_iou
Exemple #19
0
def _get_retinanet_blobs(
        foas, all_anchors, gt_boxes, gt_classes, im_width, im_height):
    total_anchors = all_anchors.shape[0]
    logger.debug('Getting mad blobs: im_height {} im_width: {}'.format(
        im_height, im_width))

    inds_inside = np.arange(all_anchors.shape[0])
    anchors = all_anchors
    num_inside = len(inds_inside)

    logger.debug('total_anchors: {}'.format(total_anchors))
    logger.debug('inds_inside: {}'.format(num_inside))
    logger.debug('anchors.shape: {}'.format(anchors.shape))

    # Compute anchor labels:
    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
    labels = np.empty((num_inside, ), dtype=np.float32)
    labels.fill(-1)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[
            np.arange(num_inside), anchor_to_gt_argmax]

        # Map from gt box to an anchor that has highest overlap
        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
        # For each gt box, amount of overlap with most overlapping anchor
        gt_to_anchor_max = anchor_by_gt_overlap[
            gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])]
        # Find all anchors that share the max overlap amount
        # (this includes many ties)
        anchors_with_max_overlap = np.where(
            anchor_by_gt_overlap == gt_to_anchor_max)[0]

        # Fg label: for each gt use anchors with highest overlap
        # (including ties)
        gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap]
        labels[anchors_with_max_overlap] = gt_classes[gt_inds]
        # Fg label: above threshold IOU
        inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP
        gt_inds = anchor_to_gt_argmax[inds]
        labels[inds] = gt_classes[gt_inds]

    fg_inds = np.where(labels >= 1)[0]
    bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0]
    labels[bg_inds] = 0
    num_fg, num_bg = len(fg_inds), len(bg_inds)

    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_targets[fg_inds, :] = data_utils.compute_targets(
        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :])

    # Map up to original set of anchors
    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0)

    # Split the generated labels, etc. into labels per each field of anchors
    blobs_out = []
    start_idx = 0
    for foa in foas:
        H = foa.field_size
        W = foa.field_size
        end_idx = start_idx + H * W
        _labels = labels[start_idx:end_idx]
        _bbox_targets = bbox_targets[start_idx:end_idx, :]
        start_idx = end_idx

        # labels output with shape (1, height, width)
        _labels = _labels.reshape((1, 1, H, W))
        # bbox_targets output with shape (1, 4 * A, height, width)
        _bbox_targets = _bbox_targets.reshape((1, H, W, 4)).transpose(0, 3, 1, 2)
        stride = foa.stride
        w = int(im_width / stride)
        h = int(im_height / stride)

        # data for select_smooth_l1 loss
        num_classes = cfg.MODEL.NUM_CLASSES - 1
        inds_4d = np.where(_labels > 0)
        M = len(inds_4d)
        _roi_bbox_targets = np.zeros((0, 4))
        _roi_fg_bbox_locs = np.zeros((0, 4))
        if M > 0:
            im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3]
            _roi_bbox_targets = np.zeros((len(im_inds), 4))
            _roi_fg_bbox_locs = np.zeros((len(im_inds), 4))
            lbls = _labels[im_inds, :, y, x]
            for i, lbl in enumerate(lbls):
                l = lbl[0] - 1
                if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                    l = 0
                assert l >= 0 and l < num_classes, 'label out of the range'
                _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]]
                _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]])
        blobs_out.append(
            dict(
                retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32),
                retnet_roi_bbox_targets=_roi_bbox_targets.astype(np.float32),
                retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(np.float32),
            ))
    out_num_fg = np.array([num_fg + 1.0], dtype=np.float32)
    out_num_bg = (
        np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) +
        out_num_fg * (cfg.MODEL.NUM_CLASSES - 2))
    return blobs_out, out_num_fg, out_num_bg
Exemple #20
0
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes):
    total_anchors = all_anchors.shape[0]
    straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH

    if straddle_thresh >= 0:
        # Only keep anchors inside the image by a margin of straddle_thresh
        # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all
        # anchors
        inds_inside = np.where(
            (all_anchors[:, 0] >= -straddle_thresh)
            & (all_anchors[:, 1] >= -straddle_thresh)
            & (all_anchors[:, 2] < im_width + straddle_thresh)
            & (all_anchors[:, 3] < im_height + straddle_thresh))[0]
        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
    else:
        inds_inside = np.arange(all_anchors.shape[0])
        anchors = all_anchors
    num_inside = len(inds_inside)

    logger.debug('total_anchors: {}'.format(total_anchors))
    logger.debug('inds_inside: {}'.format(num_inside))
    logger.debug('anchors.shape: {}'.format(anchors.shape))

    # Compute anchor labels:
    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
    labels = np.empty((num_inside, ), dtype=np.int32)
    labels.fill(-1)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside),
                                                anchor_to_gt_argmax]

        # Map from gt box to an anchor that has highest overlap
        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
        # For each gt box, amount of overlap with most overlapping anchor
        gt_to_anchor_max = anchor_by_gt_overlap[
            gt_to_anchor_argmax,
            np.arange(anchor_by_gt_overlap.shape[1])]
        # Find all anchors that share the max overlap amount
        # (this includes many ties)
        anchors_with_max_overlap = np.where(
            anchor_by_gt_overlap == gt_to_anchor_max)[0]

        # Fg label: for each gt use anchors with highest overlap
        # (including ties)
        labels[anchors_with_max_overlap] = 1
        # Fg label: above threshold IOU
        labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1
    fg_inds = np.where(labels == 1)[0]

    # subsample negative labels if we have too many
    # (samples with replacement, but since the set of bg inds is large most
    # samples will not have repeats)
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1)
    bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0]
    if len(bg_inds) > num_bg:
        enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)]
        labels[enable_inds] = 0
    bg_inds = np.where(labels == 0)[0]

    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_targets[fg_inds, :] = data_utils.compute_targets(
        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :])

    # Bbox regression loss has the form:
    #   loss(x) = weight_outside * L(weight_inside * x)
    # Inside weights allow us to set zero loss on an element-wise basis
    # Bbox regression is only trained on positive examples so we set their
    # weights to 1.0 (or otherwise if config is different) and 0 otherwise
    bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0)

    # The bbox regression loss only averages by the number of images in the
    # mini-batch, whereas we need to average by the total number of example
    # anchors selected
    # Outside weights are used to scale each element-wise loss so the final
    # average over the mini-batch is correct
    bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    # uniform weighting of examples (given non-uniform sampling)
    num_examples = np.sum(labels >= 0)
    bbox_outside_weights[labels == 1, :] = 1.0 / num_examples
    bbox_outside_weights[labels == 0, :] = 1.0 / num_examples

    # Map up to original set of anchors
    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = data_utils.unmap(bbox_targets,
                                    total_anchors,
                                    inds_inside,
                                    fill=0)
    bbox_inside_weights = data_utils.unmap(bbox_inside_weights,
                                           total_anchors,
                                           inds_inside,
                                           fill=0)
    bbox_outside_weights = data_utils.unmap(bbox_outside_weights,
                                            total_anchors,
                                            inds_inside,
                                            fill=0)

    # Split the generated labels, etc. into labels per each field of anchors
    blobs_out = []
    start_idx = 0
    for foa in foas:
        H = foa.field_size
        W = foa.field_size
        A = foa.num_cell_anchors
        end_idx = start_idx + H * W * A
        _labels = labels[start_idx:end_idx]
        _bbox_targets = bbox_targets[start_idx:end_idx, :]
        _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :]
        _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :]
        start_idx = end_idx

        # labels output with shape (1, A, height, width)
        _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2)
        # bbox_targets output with shape (1, 4 * A, height, width)
        _bbox_targets = _bbox_targets.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_inside_weights output with shape (1, 4 * A, height, width)
        _bbox_inside_weights = _bbox_inside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_outside_weights output with shape (1, 4 * A, height, width)
        _bbox_outside_weights = _bbox_outside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        blobs_out.append(
            dict(rpn_labels_int32_wide=_labels,
                 rpn_bbox_targets_wide=_bbox_targets,
                 rpn_bbox_inside_weights_wide=_bbox_inside_weights,
                 rpn_bbox_outside_weights_wide=_bbox_outside_weights))
    return blobs_out[0] if len(blobs_out) == 1 else blobs_out
Exemple #21
0
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.MRCNN.RESOLUTION
    polys_gt_inds = np.where(
        (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0)
    )[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False)
        )
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image
            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, :] = np.reshape(mask, M**2)
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_int32'] = masks
Exemple #22
0
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.MRCNN.RESOLUTION
    polys_gt_inds = np.where(
        (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0)
    )[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False)
        )
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image
            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, :] = np.reshape(mask, M**2)
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_int32'] = masks
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.MRCNN.RESOLUTION
    # gao 6,29
    gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0]
    boxes_from_polys = roidb['boxes'][gt_inds, :]
    gt_classes = roidb['gt_classes'][gt_inds]

    im_label = cv2.imread(roidb['ins_seg'], 0)
    if roidb['flipped'] == 1:  # convert flipped label to original
        im_label = im_label[:, ::-1]
        dataset_name = cfg.TRAIN.DATASETS[0]
        if 'LIP' in dataset_name:
            flipped_2_orig_class = {
                14: 15,
                15: 14,
                16: 17,
                17: 16,
                18: 19,
                19: 18
            }
        if 'ATR' in dataset_name:
            flipped_2_orig_class = {
                9: 10,
                10: 9,
                12: 13,
                13: 12,
                14: 15,
                15: 14
            }
        gt_classes_ = copy.deepcopy(gt_classes)
        for i in flipped_2_orig_class.keys():
            index_i = np.where(gt_classes_ == i)[0]
            if len(index_i) == 0:
                continue
            gt_classes[index_i] = flipped_2_orig_class[i]


#        gt_inds_flip = np.where(gt_classes>13)[0]
#        for i in gt_inds_flip:
#            gt_classes[i] = flipped_2_orig_class[gt_classes[i]]

    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            # poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image
            #logger.info('roi_fg, label shape: {},{}'.format(roi_fg,im_label.shape))
            x0, y0, x1, y1 = roi_fg
            x0 = min(int(x0), im_label.shape[1])
            x1 = min(int(x1 + 1), im_label.shape[1])
            y0 = min(int(y0), im_label.shape[0])
            y1 = min(int(y1 + 1), im_label.shape[0])
            #logger.info('x0,y0,x1,y1: {}'.format(x0, y0, x1, y1))
            mask_ = im_label[y0:y1, x0:x1]
            #logger.info('mask_ shape: {}, gt_classes[fg_polys_ind]:{}'.format(mask_.shape, boxes_from_polys[fg_polys_ind]))
            #            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask_ == gt_classes[fg_polys_ind],
                            dtype=np.int32)  # Ensure it's binary

            mask = cv2.resize(mask, (M, M), interpolation=cv2.INTER_NEAREST)
            masks[i, :] = np.reshape(mask, M**2)
        im_label = None
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks,
                                                       mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_int32'] = masks
Exemple #24
0
def evaluate_box_proposals(json_dataset,
                           roidb,
                           thresholds=None,
                           area='all',
                           limit=None):
    """Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        'all': 0,
        'small': 1,
        'medium': 2,
        'large': 3,
        '96-128': 4,
        '128-256': 5,
        '256-512': 6,
        '512-inf': 7
    }
    area_ranges = [
        [0**2, 1e5**2],  # all
        [0**2, 32**2],  # small
        [32**2, 96**2],  # medium
        [96**2, 1e5**2],  # large
        [96**2, 128**2],  # 96-128
        [128**2, 256**2],  # 128-256
        [256**2, 512**2],  # 256-512
        [512**2, 1e5**2]
    ]  # 512-inf
    assert area in areas, 'Unknown area range: {}'.format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = np.zeros(0)
    num_pos = 0
    for entry in roidb:
        gt_inds = np.where((entry['gt_classes'] > 0)
                           & (entry['is_crowd'] == 0))[0]
        gt_boxes = entry['boxes'][gt_inds, :]
        gt_areas = entry['seg_areas'][gt_inds]
        valid_gt_inds = np.where((gt_areas >= area_range[0])
                                 & (gt_areas <= area_range[1]))[0]
        gt_boxes = gt_boxes[valid_gt_inds, :]
        num_pos += len(valid_gt_inds)
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        boxes = entry['boxes'][non_gt_inds, :]
        if boxes.shape[0] == 0:
            continue
        if limit is not None and boxes.shape[0] > limit:
            boxes = boxes[:limit, :]
        overlaps = box_utils.bbox_overlaps(
            boxes.astype(dtype=np.float32, copy=False),
            gt_boxes.astype(dtype=np.float32, copy=False))
        _gt_overlaps = np.zeros((gt_boxes.shape[0]))
        for j in range(min(boxes.shape[0], gt_boxes.shape[0])):
            # find which proposal box maximally covers each gt box
            argmax_overlaps = overlaps.argmax(axis=0)
            # and get the iou amount of coverage for each gt box
            max_overlaps = overlaps.max(axis=0)
            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ind = max_overlaps.argmax()
            gt_ovr = max_overlaps.max()
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1
        # append recorded iou coverage level
        gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

    gt_overlaps = np.sort(gt_overlaps)
    if thresholds is None:
        step = 0.05
        thresholds = np.arange(0.5, 0.95 + 1e-5, step)
    recalls = np.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        'ar': ar,
        'recalls': recalls,
        'thresholds': thresholds,
        'gt_overlaps': gt_overlaps,
        'num_pos': num_pos
    }
Exemple #25
0
def _merge_proposal_boxes_into_roidb(roidb, box_list):
    """Add proposal boxes to each roidb entry."""
    assert len(box_list) == len(roidb)
    for i, entry in enumerate(roidb):
        boxes = box_list[i]
        num_boxes = boxes.shape[0]
        gt_overlaps = np.zeros(
            (num_boxes, entry['gt_overlaps'].shape[1]),
            dtype=entry['gt_overlaps'].dtype
        )
        box_to_gt_ind_map = -np.ones(
            (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype
        )

        # Note: unlike in other places, here we intentionally include all gt
        # rois, even ones marked as crowd. Boxes that overlap with crowds will
        # be filtered out later (see: _filter_crowd_proposals).
        gt_inds = np.where(entry['gt_classes'] > 0)[0]
        if len(gt_inds) > 0:
            gt_boxes = entry['boxes'][gt_inds, :]
            gt_classes = entry['gt_classes'][gt_inds]
            proposal_to_gt_overlaps = box_utils.bbox_overlaps(
                boxes.astype(dtype=np.float32, copy=False),
                gt_boxes.astype(dtype=np.float32, copy=False)
            )
            # Gt box that overlaps each input box the most
            # (ties are broken arbitrarily by class order)
            argmaxes = proposal_to_gt_overlaps.argmax(axis=1)
            # Amount of that overlap
            maxes = proposal_to_gt_overlaps.max(axis=1)
            # Those boxes with non-zero overlap with gt boxes
            I = np.where(maxes > 0)[0]
            # Record max overlaps with the class of the appropriate gt box
            gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
            box_to_gt_ind_map[I] = gt_inds[argmaxes[I]]
        entry['boxes'] = np.append(
            entry['boxes'],
            boxes.astype(entry['boxes'].dtype, copy=False),
            axis=0
        )
        entry['gt_classes'] = np.append(
            entry['gt_classes'],
            np.zeros((num_boxes), dtype=entry['gt_classes'].dtype)
        )
        entry['seg_areas'] = np.append(
            entry['seg_areas'],
            np.zeros((num_boxes), dtype=entry['seg_areas'].dtype)
        )
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(
            entry['is_crowd'],
            np.zeros((num_boxes), dtype=entry['is_crowd'].dtype)
        )
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'],
            box_to_gt_ind_map.astype(
                entry['box_to_gt_ind_map'].dtype, copy=False
            )
        )
def evaluate_box_proposals(
    json_dataset, roidb, thresholds=None, area='all', limit=None
):
    """Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        'all': 0,
        'small': 1,
        'medium': 2,
        'large': 3,
        '96-128': 4,
        '128-256': 5,
        '256-512': 6,
        '512-inf': 7}
    area_ranges = [
        [0**2, 1e5**2],    # all
        [0**2, 32**2],     # small
        [32**2, 96**2],    # medium
        [96**2, 1e5**2],   # large
        [96**2, 128**2],   # 96-128
        [128**2, 256**2],  # 128-256
        [256**2, 512**2],  # 256-512
        [512**2, 1e5**2]]  # 512-inf
    assert area in areas, 'Unknown area range: {}'.format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = np.zeros(0)
    num_pos = 0
    for entry in roidb:
        gt_inds = np.where(
            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
        gt_boxes = entry['boxes'][gt_inds, :]
        gt_areas = entry['seg_areas'][gt_inds]
        valid_gt_inds = np.where(
            (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0]
        gt_boxes = gt_boxes[valid_gt_inds, :]
        num_pos += len(valid_gt_inds)
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        boxes = entry['boxes'][non_gt_inds, :]
        if boxes.shape[0] == 0:
            continue
        if limit is not None and boxes.shape[0] > limit:
            boxes = boxes[:limit, :]
        overlaps = box_utils.bbox_overlaps(
            boxes.astype(dtype=np.float32, copy=False),
            gt_boxes.astype(dtype=np.float32, copy=False))
        _gt_overlaps = np.zeros((gt_boxes.shape[0]))
        for j in range(min(boxes.shape[0], gt_boxes.shape[0])):
            # find which proposal box maximally covers each gt box
            argmax_overlaps = overlaps.argmax(axis=0)
            # and get the iou amount of coverage for each gt box
            max_overlaps = overlaps.max(axis=0)
            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ind = max_overlaps.argmax()
            gt_ovr = max_overlaps.max()
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1
        # append recorded iou coverage level
        gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

    gt_overlaps = np.sort(gt_overlaps)
    if thresholds is None:
        step = 0.05
        thresholds = np.arange(0.5, 0.95 + 1e-5, step)
    recalls = np.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
            'gt_overlaps': gt_overlaps, 'num_pos': num_pos}
Exemple #27
0
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add DensePose specific blobs to the given inputs blobs dictionary."""
    M = cfg.BODY_UV_RCNN.HEATMAP_SIZE
    # Prepare the body UV targets by associating one gt box which contains
    # body UV annotations to each training roi that has a fg class label.
    polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0]
    boxes_from_polys = roidb['boxes'][polys_gt_inds]
    # Select foreground RoIs
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_body_uv = np.zeros_like(blobs['labels_int32'], dtype=np.int32)

    if ((boxes_from_polys.shape[0] > 0) & (fg_inds.shape[0] > 0)):
        # Find overlap between all foreground RoIs and the gt bounding boxes
        # containing each body UV annotaion.
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Select foreground RoIs as those with > 0.7 overlap
        fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1)
        fg_inds = fg_inds[fg_polys_value > 0.7]

    if ((boxes_from_polys.shape[0] > 0) & (fg_inds.shape[0] > 0)):
        roi_has_body_uv[fg_inds] = 1
        # Create body UV blobs
        # Dense masks, each mask for a given fg roi is of size M x M.
        part_inds = blob_utils.zeros((fg_inds.shape[0], M, M), int32=True)
        # Weights assigned to each target in `part_inds`. By default, all 1's.
        # part_inds_weights = blob_utils.zeros((fg_inds.shape[0], M, M), int32=True)
        part_inds_weights = blob_utils.ones((fg_inds.shape[0], M, M),
                                            int32=False)
        # 2D spatial coordinates (on the image). Shape is (#fg_rois, 2) in format
        # (x, y).
        coords_xy = blob_utils.zeros((fg_inds.shape[0], 196, 2), int32=False)
        # 24 patch indices plus a background class
        I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True)
        # UV coordinates in each patch
        U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        # Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)

        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = overlaps_bbfg_bbpolys[fg_inds]
        # Map from each fg roi to the index of the gt box with highest overlap
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # Add body UV targets for each fg roi
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            polys_gt_ind = polys_gt_inds[fg_polys_ind]
            # RLE encoded dense masks which are of size 256 x 256.
            # Map all part masks to 14 labels (i.e., indices of semantic body parts).
            dp_masks = dp_utils.GetDensePoseMask(
                roidb['dp_masks'][polys_gt_ind],
                cfg.BODY_UV_RCNN.NUM_SEMANTIC_PARTS)
            # Surface patch indices of collected points
            dp_I = np.array(roidb['dp_I'][polys_gt_ind], dtype=np.int32)
            # UV coordinates of collected points
            dp_U = np.array(roidb['dp_U'][polys_gt_ind], dtype=np.float32)
            dp_V = np.array(roidb['dp_V'][polys_gt_ind], dtype=np.float32)
            # dp_UV_weights = np.ones_like(dp_I).astype(np.float32)
            # Spatial coordinates on the image which are scaled such that the bbox
            # size is 256 x 256.
            dp_x = np.array(roidb['dp_x'][polys_gt_ind], dtype=np.float32)
            dp_y = np.array(roidb['dp_y'][polys_gt_ind], dtype=np.float32)
            # Do the flipping of the densepose annotation
            if roidb['flipped']:
                dp_I, dp_U, dp_V, dp_x, dp_y, dp_masks = DP.get_symmetric_densepose(
                    dp_I, dp_U, dp_V, dp_x, dp_y, dp_masks)

            roi_fg = rois_fg[i]
            gt_box = boxes_from_polys[fg_polys_ind]
            fg_x1, fg_y1, fg_x2, fg_y2 = roi_fg[0:4]
            gt_x1, gt_y1, gt_x2, gt_y2 = gt_box[0:4]
            fg_width = fg_x2 - fg_x1
            fg_height = fg_y2 - fg_y1
            gt_width = gt_x2 - gt_x1
            gt_height = gt_y2 - gt_y1
            fg_scale_w = float(M) / fg_width
            fg_scale_h = float(M) / fg_height
            gt_scale_w = 256. / gt_width
            gt_scale_h = 256. / gt_height
            # Sample M points evenly within the fg roi and scale the relative coordinates
            # (to associated gt box) such that the bounding box size is 256 x 256.
            x_targets = (np.arange(fg_x1, fg_x2, fg_width / M) -
                         gt_x1) * gt_scale_w
            y_targets = (np.arange(fg_y1, fg_y2, fg_height / M) -
                         gt_y1) * gt_scale_h
            # Construct 2D coordiante matrices
            x_targets, y_targets = np.meshgrid(x_targets[:M], y_targets[:M])
            ## Another implementation option (which results in similar performance)
            # x_targets = (np.linspace(fg_x1, fg_x2, M, endpoint=True, dtype=np.float32) - gt_x1) * gt_scale_w
            # y_targets = (np.linspace(fg_y1, fg_y2, M, endpoint=True, dtype=np.float32) - gt_y1) * gt_scale_h
            # x_targets = (np.linspace(fg_x1, fg_x2, M, endpoint=False) - gt_x1) * gt_scale_w
            # y_targets = (np.linspace(fg_y1, fg_y2, M, endpoint=False) - gt_y1) * gt_scale_h
            # x_targets, y_targets = np.meshgrid(x_targets, y_targets)

            # Map dense masks of size 256 x 256 to target heatmap of size M x M.
            part_inds[i] = cv2.remap(dp_masks,
                                     x_targets.astype(np.float32),
                                     y_targets.astype(np.float32),
                                     interpolation=cv2.INTER_NEAREST,
                                     borderMode=cv2.BORDER_CONSTANT,
                                     borderValue=(0))

            # Scale annotated spatial coordinates from bbox of size 256 x 256 to target
            # heatmap of size M x M.
            dp_x = (dp_x / gt_scale_w + gt_x1 - fg_x1) * fg_scale_w
            dp_y = (dp_y / gt_scale_h + gt_y1 - fg_y1) * fg_scale_h
            # Set patch index of points outside the heatmap as 0 (background).
            dp_I[dp_x < 0] = 0
            dp_I[dp_x > (M - 1)] = 0
            dp_I[dp_y < 0] = 0
            dp_I[dp_y > (M - 1)] = 0
            # Get body UV annotations of points inside the heatmap.
            points_inside = dp_I > 0
            dp_x = dp_x[points_inside]
            dp_y = dp_y[points_inside]
            dp_I = dp_I[points_inside]
            dp_U = dp_U[points_inside]
            dp_V = dp_V[points_inside]
            # dp_UV_weights = dp_UV_weights[points_inside]

            # Update body UV blobs
            num_dp_points = len(dp_I)
            # coords_xy[i, 0:num_dp_points, 0] = i  # fg_roi index
            coords_xy[i, 0:num_dp_points, 0] = dp_x
            coords_xy[i, 0:num_dp_points, 1] = dp_y
            I_points[i, 0:num_dp_points] = dp_I.astype(np.int32)
            U_points[i, 0:num_dp_points] = dp_U
            V_points[i, 0:num_dp_points] = dp_V
            # Uv_point_weights[i, 0:len(dp_UV_weights)] = dp_UV_weights
    else:  # If there are no fg rois
        # The network cannot handle empty blobs, so we must provide a blob.
        # We simply take the first bg roi, give it an all 0's body UV annotations
        # and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # `rois_fg` is actually one background roi, but that's ok because ...
        if len(bg_inds) == 0:
            rois_fg = sampled_boxes[0].reshape((1, -1))
        else:
            rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # Mark that the first roi has body UV annotation
        roi_has_body_uv[0] = 1
        # We give it all 0's blobs
        part_inds = blob_utils.zeros((1, M, M), int32=True)
        part_inds_weights = blob_utils.zeros((1, M, M), int32=False)
        coords_xy = blob_utils.zeros((1, 196, 2), int32=False)
        I_points = blob_utils.zeros((1, 196), int32=True)
        U_points = blob_utils.zeros((1, 196), int32=False)
        V_points = blob_utils.zeros((1, 196), int32=False)
        # Uv_point_weights = blob_utils.zeros((1, 196), int32=False)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))
    # Create body UV blobs for all patches (including background)
    K = cfg.BODY_UV_RCNN.NUM_PATCHES + 1
    # Construct U/V_points blobs for all patches by repeating it #num_patches times.
    # Shape: (#rois, 196, K)
    U_points = np.repeat(U_points[:, :, np.newaxis], K, axis=-1)
    V_points = np.repeat(V_points[:, :, np.newaxis], K, axis=-1)
    uv_point_weights = np.zeros_like(U_points)
    # Set binary weights for UV targets in each patch
    for i in np.arange(1, K):
        uv_point_weights[:, :, i] = (I_points == i).astype(np.float32)

    # Update blobs dict with body UV blobs
    blobs['body_uv_rois'] = rois_fg
    blobs['roi_has_body_uv_int32'] = roi_has_body_uv  # shape: (#rois,)
    blobs['body_uv_parts'] = part_inds  # shape: (#rois, M, M)
    blobs['body_uv_parts_weights'] = part_inds_weights
    blobs['body_uv_coords_xy'] = coords_xy.reshape(
        -1, 2)  # shape: (#rois * 196, 2)
    blobs['body_uv_I_points'] = I_points.reshape(-1,
                                                 1)  # shape: (#rois * 196, 1)
    blobs['body_uv_U_points'] = U_points  # shape: (#rois, 196, K)
    blobs['body_uv_V_points'] = V_points
    blobs['body_uv_point_weights'] = uv_point_weights