Beispiel #1
0
def _expand_bbox_targets(bbox_target_data):
    """Bounding-box regression targets are stored in a compact form in the
    roidb.

    This function expands those targets into the 4-of-4*K representation used
    by the network (i.e. only one class has non-zero targets). The loss weights
    are similarly expanded.

    Returns:
        bbox_target_data (ndarray): N x 4K blob of regression targets
        bbox_inside_weights (ndarray): N x 4K blob of loss weights
    """
    num_bbox_reg_classes = cfg.MODEL.NUM_CLASSES
    if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
        num_bbox_reg_classes = 2  # bg and fg

    clss = bbox_target_data[:, 0]
    bbox_targets = blob_utils.zeros((clss.size, 4 * num_bbox_reg_classes))
    bbox_inside_weights = blob_utils.zeros(bbox_targets.shape)
    inds = np.where(clss > 0)[0]
    for ind in inds:
        cls = int(clss[ind])
        start = 4 * cls
        end = start + 4
        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
        bbox_inside_weights[ind, start:end] = (1.0, 1.0, 1.0, 1.0)
    return bbox_targets, bbox_inside_weights
Beispiel #2
0
def keypoints_to_heatmap_labels(keypoints, rois):
    """Encode keypoint location in the target heatmap for use in
    SoftmaxWithLoss.
    """
    # Maps keypoints from the half-open interval [x1, x2) on continuous image
    # coordinates to the closed interval [0, HEATMAP_SIZE - 1] on discrete image
    # coordinates. We use the continuous <-> discrete conversion from Heckbert
    # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5,
    # where d is a discrete coordinate and c is a continuous coordinate.
    assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS

    shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS)
    heatmaps = blob_utils.zeros(shape)
    weights = blob_utils.zeros(shape)

    offset_x = rois[:, 0]
    offset_y = rois[:, 1]
    scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0])
    scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1])

    for kp in range(keypoints.shape[2]):
        vis = keypoints[:, 2, kp] > 0
        x = keypoints[:, 0, kp].astype(np.float32)
        y = keypoints[:, 1, kp].astype(np.float32)
        # Since we use floor below, if a keypoint is exactly on the roi's right
        # or bottom boundary, we shift it in by eps (conceptually) to keep it in
        # the ground truth heatmap.
        x_boundary_inds = np.where(x == rois[:, 2])[0]
        y_boundary_inds = np.where(y == rois[:, 3])[0]
        x = (x - offset_x) * scale_x
        x = np.floor(x)
        if len(x_boundary_inds) > 0:
            x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1

        y = (y - offset_y) * scale_y
        y = np.floor(y)
        if len(y_boundary_inds) > 0:
            y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1

        valid_loc = np.logical_and(
            np.logical_and(x >= 0, y >= 0),
            np.logical_and(
                x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE))

        valid = np.logical_and(valid_loc, vis)
        valid = valid.astype(np.int32)

        lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x
        heatmaps[:, kp] = lin_ind * valid
        weights[:, kp] = valid

    return heatmaps, weights
Beispiel #3
0
def add_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    M = cfg.UVRCNN.HEATMAP_SIZE
    IsFlipped = roidb['flipped']
    #
    polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0]
    boxes_from_polys = [roidb['boxes'][i, :] for i in polys_gt_inds]
    if not (boxes_from_polys):
        pass
    else:
        boxes_from_polys = np.vstack(boxes_from_polys)
    boxes_from_polys = np.array(boxes_from_polys)

    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = np.zeros(blobs['labels_int32'].shape)

    if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)):
        rois_fg = sampled_boxes[fg_inds]
        #
        rois_fg.astype(np.float32, copy=False)
        boxes_from_polys.astype(np.float32, copy=False)
        #
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1)
        fg_inds = fg_inds[fg_polys_value > 0.7]

    if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)):
        for jj in fg_inds:
            roi_has_mask[jj] = 1

        # Create blobs for densepose supervision.
        ################################################## The mask
        All_labels = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)
        All_Weights = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)
        ################################################# The points
        X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True)
        I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True)
        U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False)
        Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196),
                                            int32=False)
        #################################################

        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        for i in range(rois_fg.shape[0]):
            #
            fg_polys_ind = polys_gt_inds[fg_polys_inds[i]]
            #
            Ilabel = segm_utils.GetDensePoseMask(
                roidb['dp_masks'][fg_polys_ind])
            #
            GT_I = np.array(roidb['dp_I'][fg_polys_ind])
            GT_U = np.array(roidb['dp_U'][fg_polys_ind])
            GT_V = np.array(roidb['dp_V'][fg_polys_ind])
            GT_x = np.array(roidb['dp_x'][fg_polys_ind])
            GT_y = np.array(roidb['dp_y'][fg_polys_ind])
            GT_weights = np.ones(GT_I.shape).astype(np.float32)
            #
            ## Do the flipping of the densepose annotation !
            if (IsFlipped):
                GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel = DP.get_symmetric_densepose(
                    GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel)
            #
            roi_fg = rois_fg[i]
            roi_gt = boxes_from_polys[fg_polys_inds[i], :]
            #
            x1 = roi_fg[0]
            x2 = roi_fg[2]
            y1 = roi_fg[1]
            y2 = roi_fg[3]
            #
            x1_source = roi_gt[0]
            x2_source = roi_gt[2]
            y1_source = roi_gt[1]
            y2_source = roi_gt[3]
            #
            x_targets = (np.arange(x1, x2, (x2 - x1) / M) -
                         x1_source) * (256. / (x2_source - x1_source))
            y_targets = (np.arange(y1, y2, (y2 - y1) / M) -
                         y1_source) * (256. / (y2_source - y1_source))
            #
            x_targets = x_targets[
                0:
                M]  ## Strangely sometimes it can be M+1, so make sure size is OK!
            y_targets = y_targets[0:M]
            #
            [X_targets, Y_targets] = np.meshgrid(x_targets, y_targets)
            New_Index = cv2.remap(Ilabel,
                                  X_targets.astype(np.float32),
                                  Y_targets.astype(np.float32),
                                  interpolation=cv2.INTER_NEAREST,
                                  borderMode=cv2.BORDER_CONSTANT,
                                  borderValue=(0))
            #
            All_L = np.zeros(New_Index.shape)
            All_W = np.ones(New_Index.shape)
            #
            All_L = New_Index
            #
            gt_length_x = x2_source - x1_source
            gt_length_y = y2_source - y1_source
            #
            GT_y = ((GT_y / 256. * gt_length_y) + y1_source - y1) * (M /
                                                                     (y2 - y1))
            GT_x = ((GT_x / 256. * gt_length_x) + x1_source - x1) * (M /
                                                                     (x2 - x1))
            #
            GT_I[GT_y < 0] = 0
            GT_I[GT_y > (M - 1)] = 0
            GT_I[GT_x < 0] = 0
            GT_I[GT_x > (M - 1)] = 0
            #
            points_inside = GT_I > 0
            GT_U = GT_U[points_inside]
            GT_V = GT_V[points_inside]
            GT_x = GT_x[points_inside]
            GT_y = GT_y[points_inside]
            GT_weights = GT_weights[points_inside]
            GT_I = GT_I[points_inside]
            #
            X_points[i, 0:len(GT_x)] = GT_x
            Y_points[i, 0:len(GT_y)] = GT_y
            Ind_points[i, 0:len(GT_I)] = i
            I_points[i, 0:len(GT_I)] = GT_I
            U_points[i, 0:len(GT_U)] = GT_U
            V_points[i, 0:len(GT_V)] = GT_V
            Uv_point_weights[i, 0:len(GT_weights)] = GT_weights
            #
            All_labels[i, :] = np.reshape(All_L.astype(np.int32), M**2)
            All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M**2)
            ##
    else:
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        #
        if (len(bg_inds) == 0):
            rois_fg = sampled_boxes[0].reshape((1, -1))
        else:
            rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))

        roi_has_mask[0] = 1
        #
        X_points = blob_utils.zeros((1, 196), int32=False)
        Y_points = blob_utils.zeros((1, 196), int32=False)
        Ind_points = blob_utils.zeros((1, 196), int32=True)
        I_points = blob_utils.zeros((1, 196), int32=True)
        U_points = blob_utils.zeros((1, 196), int32=False)
        V_points = blob_utils.zeros((1, 196), int32=False)
        Uv_point_weights = blob_utils.zeros((1, 196), int32=False)
        #
        All_labels = -blob_utils.ones((1, M**2), int32=True) * 0  ## zeros
        All_Weights = -blob_utils.ones((1, M**2), int32=True) * 0  ## zeros
    #
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))
    #
    K = cfg.UVRCNN.NUM_PATCHES
    #
    U_points = np.tile(U_points, [1, K + 1])
    V_points = np.tile(V_points, [1, K + 1])
    Uv_Weight_Points = np.zeros(U_points.shape)
    #
    for jjj in range(1, K + 1):
        Uv_Weight_Points[:, jjj * I_points.shape[1]:(jjj + 1) *
                         I_points.shape[1]] = (I_points == jjj).astype(
                             np.float32)
    #
    ################
    # Update blobs dict with Mask R-CNN blobs
    ###############
    #
    blobs['uv_rois'] = np.array(rois_fg)
    blobs['roi_has_uv_int32'] = np.array(roi_has_mask).astype(np.int32)
    ##
    blobs['uv_ann_labels'] = np.array(All_labels).astype(np.int32)
    blobs['uv_ann_weights'] = np.array(All_Weights).astype(np.float32)
    #
    ##########################
    blobs['uv_X_points'] = X_points.astype(np.float32)
    blobs['uv_Y_points'] = Y_points.astype(np.float32)
    blobs['uv_Ind_points'] = Ind_points.astype(np.float32)
    blobs['uv_I_points'] = I_points.astype(np.float32)
    blobs['uv_U_points'] = U_points.astype(
        np.float32)  #### VERY IMPORTANT :   These are switched here :
    blobs['uv_V_points'] = V_points.astype(np.float32)
    blobs['uv_point_weights'] = Uv_Weight_Points.astype(np.float32)
Beispiel #4
0
def add_parsing_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add parsing R-CNN specific blobs to the input blob dictionary."""
    # Prepare the parsing targets by associating one gt parsing to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.PRCNN.RESOLUTION
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]

    parsing_gt = [roidb['parsing'][i] for i in polys_gt_inds]
    boxes_from_png = parsing_utils.parsing_to_boxes(parsing_gt,
                                                    roidb['flipped'])

    fg_inds = np.where(blobs['labels_int32'] > 0)[0]

    if fg_inds.shape[0] > 0:
        if cfg.PRCNN.ROI_BATCH_SIZE > 0:
            fg_rois_per_this_image = np.minimum(cfg.PRCNN.ROI_BATCH_SIZE,
                                                fg_inds.shape[0])
            fg_inds = npr.choice(fg_inds,
                                 size=fg_rois_per_this_image,
                                 replace=False)
        parsings = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_png.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the parsing with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            parsing_gt_fg = parsing_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image

            parsing = parsing_utils.parsing_wrt_box(parsing_gt_fg, roi_fg, M,
                                                    roidb['flipped'])
            parsings[i, :] = parsing
        weights = blob_utils.ones((rois_fg.shape[0], M**2))
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        if (len(bg_inds) == 0):
            rois_fg = sampled_boxes[0].reshape((1, -1))
        else:
            rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        parsings = blob_utils.zeros((1, M**2), int32=True)
        # Mark that the first roi has a mask
        weights = blob_utils.zeros((1, M**2))

    parsings = np.reshape(parsings, (-1, 1))
    weights = np.reshape(weights, (-1, 1))

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['parsing_rois'] = rois_fg
    blobs['parsing_weights'] = weights
    blobs['parsing_int32'] = parsings
Beispiel #5
0
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.MRCNN.RESOLUTION
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds]
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    if fg_inds.shape[0] > 0:
        if cfg.MRCNN.ROI_BATCH_SIZE > 0:
            fg_rois_per_this_image = np.minimum(cfg.MRCNN.ROI_BATCH_SIZE,
                                                fg_inds.shape[0])
            fg_inds = npr.choice(fg_inds,
                                 size=fg_rois_per_this_image,
                                 replace=False)
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image
            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, :] = np.reshape(mask, M**2)
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks,
                                                       mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_int32'] = masks
Beispiel #6
0
def add_keypoint_rcnn_blobs(blobs, roidb, fg_rois_per_image, fg_inds, im_scale,
                            batch_idx):
    """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
    # Note: gt_inds must match how they're computed in
    # datasets.json_dataset._merge_proposal_boxes_into_roidb
    gt_inds = np.where(roidb['gt_classes'] > 0)[0]
    max_overlaps = roidb['max_overlaps']
    gt_keypoints = roidb['gt_keypoints']

    ind_kp = gt_inds[roidb['box_to_gt_ind_map']]
    within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes'])
    vis_kp = gt_keypoints[ind_kp, 2, :] > 0
    is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0
    kp_fg_inds = np.where(
        np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible))[0]

    if kp_fg_inds.size > 0:
        kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image,
                                               kp_fg_inds.size)
        if cfg.KRCNN.ROI_BATCH_SIZE > 0:
            kp_fg_rois_per_this_image = np.minimum(kp_fg_rois_per_this_image,
                                                   cfg.KRCNN.ROI_BATCH_SIZE)
        if kp_fg_inds.size > kp_fg_rois_per_this_image:
            kp_fg_inds = np.random.choice(kp_fg_inds,
                                          size=kp_fg_rois_per_this_image,
                                          replace=False)

        sampled_fg_rois = roidb['boxes'][kp_fg_inds]
        box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]

        num_keypoints = gt_keypoints.shape[2]
        sampled_keypoints = -np.ones(
            (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints),
            dtype=gt_keypoints.dtype)
        for ii in range(len(sampled_fg_rois)):
            ind = box_to_gt_ind_map[ii]
            if ind >= 0:
                sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
                assert np.sum(sampled_keypoints[ii, 2, :]) > 0

        if cfg.KRCNN.GAUSS_HEATMAP:
            heats, weights = keypoint_utils.keypoints_to_gauss_heatmap_labels(
                sampled_keypoints, sampled_fg_rois)
            shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS)
            shape_heats = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS,
                           cfg.KRCNN.HEATMAP_SIZE, cfg.KRCNN.HEATMAP_SIZE)

            heats = heats.reshape(shape_heats)
            weights = weights.reshape(shape)
        else:
            heats, weights = keypoint_utils.keypoints_to_heatmap_labels(
                sampled_keypoints, sampled_fg_rois)

            shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1)
            heats = heats.reshape(shape)
            weights = weights.reshape(shape)
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a kp
        # We simply take the first bg roi.
        kp_bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        if (len(kp_bg_inds) == 0):
            sampled_fg_rois = roidb['boxes'][0].reshape((1, -1))
        else:
            sampled_fg_rois = roidb['boxes'][kp_bg_inds[0]].reshape((1, -1))

        if cfg.KRCNN.GAUSS_HEATMAP:
            shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS)
            shape_heats = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS,
                           cfg.KRCNN.HEATMAP_SIZE, cfg.KRCNN.HEATMAP_SIZE)
            heats = blob_utils.zeros(shape_heats)
            weights = blob_utils.zeros(shape)
        else:
            shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1)
            heats = blob_utils.zeros(shape)
            weights = blob_utils.zeros(shape)

    sampled_fg_rois *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones(
        (sampled_fg_rois.shape[0], 1))
    sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois))

    blobs['keypoint_rois'] = sampled_fg_rois
    if cfg.KRCNN.GAUSS_HEATMAP:
        blobs['keypoint_locations_int32'] = heats.astype(np.float32,
                                                         copy=False)
    else:
        blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False)
    blobs['keypoint_weights'] = weights
Beispiel #7
0
def add_rpn_blobs(blobs, im_scales, roidb):
    """Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        # RPN applied to many feature levels, as in the FPN paper
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        foas = []
        for lvl in range(k_min, k_max + 1):
            field_stride = min(2.**lvl, cfg.FPN.BACKBONE_STRIDE)
            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE *
                            2.**(lvl - k_min), )
            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
            foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes,
                                                  anchor_aspect_ratios)
            foas.append(foa)
        all_anchors = np.concatenate([f.field_of_anchors for f in foas])
    else:
        foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES,
                                              cfg.RPN.ASPECT_RATIOS)
        all_anchors = foa.field_of_anchors

    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where((entry['gt_classes'] > 0)
                           & (entry['is_crowd'] == 0))[0]
        gt_rois = entry['boxes'][gt_inds, :] * scale
        # TODO(rbg): gt_boxes is poorly named;
        # should be something like 'gt_rois_info'
        gt_boxes = blob_utils.zeros((len(gt_inds), 6))
        gt_boxes[:, 0] = im_i  # batch inds
        gt_boxes[:, 1:5] = gt_rois
        gt_boxes[:, 5] = entry['gt_classes'][gt_inds]
        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        # Add RPN targets
        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
            # RPN applied to many feature levels, as in the FPN paper
            rpn_blobs = _get_rpn_blobs(im_height, im_width, foas, all_anchors,
                                       gt_rois)
            for i, lvl in enumerate(range(k_min, k_max + 1)):
                for k, v in rpn_blobs[i].items():
                    blobs[k + '_fpn' + str(lvl)].append(v)
        else:
            # Classical RPN, applied to a single feature level
            rpn_blobs = _get_rpn_blobs(im_height, im_width, [foa], all_anchors,
                                       gt_rois)
            for k, v in rpn_blobs.items():
                blobs[k].append(v)

    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)

    valid_keys = [
        'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes',
        'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints'
    ]
    if cfg.MODEL.UV_ON:
        valid_keys.extend([
            'flipped', 'ignore_UV_body', 'dp_x', 'dp_y', 'dp_I', 'dp_U',
            'dp_V', 'dp_masks', 'has_uv'
        ])
    if cfg.MODEL.PARSING_ON:
        valid_keys.extend(['parsing', 'has_parsing'])
        if 'flipped' not in valid_keys:
            valid_keys.append('flipped')

    minimal_roidb = [{} for _ in range(len(roidb))]
    for i, e in enumerate(roidb):
        for k in valid_keys:
            if k in e:
                minimal_roidb[i][k] = e[k]
    # blobs['roidb'] = blob_utils.serialize(minimal_roidb)
    blobs['roidb'] = minimal_roidb

    # Always return valid=True, since RPN minibatches are valid by design
    return True