def add_keypoint_rcnn_blobs(blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx): """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.""" # Note: gt_inds must match how they're computed in # datasets.json_dataset._merge_proposal_boxes_into_roidb gt_inds = np.where(roidb['gt_classes'] > 0)[0] max_overlaps = roidb['max_overlaps'] gt_keypoints = roidb['gt_keypoints'] ind_kp = gt_inds[roidb['box_to_gt_ind_map']] within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes']) vis_kp = gt_keypoints[ind_kp, 2, :] > 0 is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0 kp_fg_inds = np.where( np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible))[0] kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size) if kp_fg_inds.size > kp_fg_rois_per_this_image: kp_fg_inds = np.random.choice(kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False) sampled_fg_rois = roidb['boxes'][kp_fg_inds] box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds] num_keypoints = gt_keypoints.shape[2] sampled_keypoints = -np.ones( (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints), dtype=gt_keypoints.dtype) for ii in range(len(sampled_fg_rois)): ind = box_to_gt_ind_map[ii] if ind >= 0: sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :] assert np.sum(sampled_keypoints[ii, 2, :]) > 0 heats, weights = keypoint_utils.keypoints_to_heatmap_labels( sampled_keypoints, sampled_fg_rois) shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, ) heats = heats.reshape(shape) weights = weights.reshape(shape) sampled_fg_rois *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_fg_rois.shape[0], 1)) sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois)) blobs['keypoint_rois'] = sampled_fg_rois blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False) blobs['keypoint_weights'] = weights
def _expand_to_class_specific_mask_targets(masks, mask_class_labels): """Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2) to encode class specific mask targets. """ assert masks.shape[0] == mask_class_labels.shape[0] M = cfg.MRCNN.RESOLUTION # Target values of -1 are "don't care" / ignore labels mask_targets = -blob_utils.ones( (masks.shape[0], cfg.MODEL.NUM_ACTOR_CLASSES * M**2), int32=True) for i in range(masks.shape[0]): cls = int(mask_class_labels[i]) start = M**2 * cls end = start + M**2 # Ignore background instance # (only happens when there is no fg samples in an image) if cls > 0: mask_targets[i, start:end] = masks[i, :] return mask_targets
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where((roidb['actor_gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds] fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def _sample_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs comprising foreground and background examples. """ rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['actor_max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with actor_sampled_labels = roidb['actor_max_classes'][keep_inds] action_sampled_labels = roidb['action_max_classes'][keep_inds] actor_sampled_labels[ fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 action_sampled_labels[fg_rois_per_this_image:] = 0 sampled_boxes = roidb['boxes'][keep_inds] # metric_learning_boxes_inds = np.where(max_overlaps[keep_inds] >= 0.8)[0] # metric_learning_box_to_ind_map = roidb['box_to_gt_ind_map'][keep_inds][metric_learning_boxes_inds] kept_overlaps = max_overlaps[keep_inds] kept_box_to_gt_ind_map = roidb['box_to_gt_ind_map'][keep_inds] if 'actor_bbox_targets' not in roidb: gt_inds = np.where(roidb['actor_gt_classes'] > 0)[0] gt_boxes = roidb['boxes'][gt_inds, :] gt_assignments = gt_inds[roidb['box_to_gt_ind_map'][keep_inds]] actor_bbox_targets = _compute_targets(sampled_boxes, gt_boxes[gt_assignments, :], actor_sampled_labels) actor_bbox_targets, actor_bbox_inside_weights = _expand_bbox_targets( actor_bbox_targets, cfg.MODEL.NUM_ACTOR_CLASSES) else: actor_bbox_targets, actor_bbox_inside_weights = _expand_bbox_targets( roidb['actor_bbox_targets'][keep_inds, :], cfg.MODEL.NUM_ACTOR_CLASSES) if 'action_bbox_targets' not in roidb: gt_inds = np.where(roidb['action_gt_classes'] > 0)[0] gt_boxes = roidb['boxes'][gt_inds, :] gt_assignments = gt_inds[roidb['box_to_gt_ind_map'][keep_inds]] action_bbox_targets = _compute_targets(sampled_boxes, gt_boxes[gt_assignments, :], actor_sampled_labels) action_bbox_targets, action_bbox_inside_weights = _expand_bbox_targets( action_bbox_targets, cfg.MODEL.NUM_ACTION_CLASSES) else: action_bbox_targets, action_bbox_inside_weights = _expand_bbox_targets( roidb['action_bbox_targets'][keep_inds, :], cfg.MODEL.NUM_ACTION_CLASSES) actor_bbox_outside_weights = np.array( actor_bbox_inside_weights > 0, dtype=actor_bbox_inside_weights.dtype) action_bbox_outside_weights = np.array( action_bbox_inside_weights > 0, dtype=action_bbox_inside_weights.dtype) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict(labels_int32=actor_sampled_labels.astype(np.int32, copy=False), action_labels_int32=action_sampled_labels.astype( np.int32, copy=False), rois=sampled_rois, actor_bbox_targets=actor_bbox_targets, actor_bbox_inside_weights=actor_bbox_inside_weights, actor_bbox_outside_weights=actor_bbox_outside_weights, action_bbox_targets=action_bbox_targets, action_bbox_inside_weights=action_bbox_inside_weights, action_bbox_outside_weights=action_bbox_outside_weights) blob_dict['kept_overlaps'] = kept_overlaps blob_dict['kept_box_to_gt_ind_map'] = kept_box_to_gt_ind_map # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON: roi_data.mask_rcnn.add_mask_rcnn_blobs(blob_dict, sampled_boxes, roidb, im_scale, batch_idx) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON: roi_data.keypoint_rcnn.add_keypoint_rcnn_blobs(blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx) return blob_dict