def _compute_targets(entry): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs rois = entry['boxes'] overlaps = entry['max_overlaps'] labels = entry['max_classes'] gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # Targets has format (class, tx, ty, tw, th) targets = np.zeros((rois.shape[0], 5), dtype=np.float32) if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return targets # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = box_utils.bbox_overlaps( rois[ex_inds, :].astype(dtype=np.float32, copy=False), rois[gt_inds, :].astype(dtype=np.float32, copy=False)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] # Use class "1" for all boxes if using class_agnostic_bbox_reg targets[ex_inds, 0] = (1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds]) targets[ex_inds, 1:] = box_utils.bbox_transform_inv(ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS) return targets
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where((roidb['actor_gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds] fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes): total_anchors = all_anchors.shape[0] straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH if straddle_thresh >= 0: # Only keep anchors inside the image by a margin of straddle_thresh # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all # anchors inds_inside = np.where( (all_anchors[:, 0] >= -straddle_thresh) & (all_anchors[:, 1] >= -straddle_thresh) & (all_anchors[:, 2] < im_width + straddle_thresh) & (all_anchors[:, 3] < im_height + straddle_thresh))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] else: inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: %d', total_anchors) logger.debug('inds_inside: %d', num_inside) logger.debug('anchors.shape: %s', str(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.int32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max)[0] # Fg label: for each gt use anchors with highest overlap # (including ties) labels[anchors_with_max_overlap] = 1 # Fg label: above threshold IOU labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 fg_inds = np.where(labels == 1)[0] # subsample negative labels if we have too many # (samples with replacement, but since the set of bg inds is large most # samples will not have repeats) num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1) bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0] if len(bg_inds) > num_bg: enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)] labels[enable_inds] = 0 bg_inds = np.where(labels == 0)[0] bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]) # Bbox regression loss has the form: # loss(x) = weight_outside * L(weight_inside * x) # Inside weights allow us to set zero loss on an element-wise basis # Bbox regression is only trained on positive examples so we set their # weights to 1.0 (or otherwise if config is different) and 0 otherwise bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0) # The bbox regression loss only averages by the number of images in the # mini-batch, whereas we need to average by the total number of example # anchors selected # Outside weights are used to scale each element-wise loss so the final # average over the mini-batch is correct bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) bbox_outside_weights[labels == 1, :] = 1.0 / num_examples bbox_outside_weights[labels == 0, :] = 1.0 / num_examples # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = data_utils.unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = data_utils.unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size A = foa.num_cell_anchors end_idx = start_idx + H * W * A _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :] _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, A, height, width) _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_inside_weights output with shape (1, 4 * A, height, width) _bbox_inside_weights = _bbox_inside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_outside_weights output with shape (1, 4 * A, height, width) _bbox_outside_weights = _bbox_outside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) blobs_out.append( dict(rpn_labels_int32_wide=_labels, rpn_bbox_targets_wide=_bbox_targets, rpn_bbox_inside_weights_wide=_bbox_inside_weights, rpn_bbox_outside_weights_wide=_bbox_outside_weights)) return blobs_out[0] if len(blobs_out) == 1 else blobs_out
def evaluate_box_proposals( json_dataset, roidb, thresholds=None, area='all', limit=None ): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2]] # 512-inf assert area in areas, 'Unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for entry in roidb: gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_boxes = entry['boxes'][gt_inds, :] gt_areas = entry['seg_areas'][gt_inds] valid_gt_inds = np.where( (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) non_gt_inds = np.where(entry['gt_classes'] == 0)[0] boxes = entry['boxes'][non_gt_inds, :] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(min(boxes.shape[0], gt_boxes.shape[0])): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps, 'num_pos': num_pos}
def _merge_proposal_boxes_into_roidb(roidb, box_list): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) for i, entry in enumerate(roidb): boxes = box_list[i] num_boxes = boxes.shape[0] actor_gt_overlaps = np.zeros( (num_boxes, entry['actor_gt_overlaps'].shape[1]), dtype=entry['actor_gt_overlaps'].dtype ) action_gt_overlaps = np.zeros( (num_boxes, entry['action_gt_overlaps'].shape[1]), dtype=entry['action_gt_overlaps'].dtype ) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype ) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). # assume that actor-action must be foreground at the same time gt_inds = np.where(entry['actor_gt_classes'] > 0)[0] if len(gt_inds) > 0: gt_boxes = entry['boxes'][gt_inds, :] actor_gt_classes = entry['actor_gt_classes'][gt_inds] action_gt_classes = entry['action_gt_classes'][gt_inds] proposal_to_gt_overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False) ) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) # for each proposal, which index of class id has the highest overlap # (if all overlaps are zeros, first index of class id will be returned) argmaxes = proposal_to_gt_overlaps.argmax(axis=1) # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) # Those boxes with non-zero overlap with gt boxes # will filter out proposals that has no overlap with any ground truth box I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box actor_gt_overlaps[I, actor_gt_classes[argmaxes[I]]] = maxes[I] action_gt_overlaps[I, action_gt_classes[argmaxes[I]]] = maxes[I] box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] entry['boxes'] = np.append( entry['boxes'], boxes.astype(entry['boxes'].dtype, copy=False), axis=0 ) entry['actor_gt_classes'] = np.append( entry['actor_gt_classes'], np.zeros((num_boxes), dtype=entry['actor_gt_classes'].dtype) ) entry['action_gt_classes'] = np.append( entry['action_gt_classes'], np.zeros((num_boxes), dtype=entry['action_gt_classes'].dtype) ) entry['seg_areas'] = np.append( entry['seg_areas'], np.zeros((num_boxes), dtype=entry['seg_areas'].dtype) ) entry['actor_gt_overlaps'] = np.append( entry['actor_gt_overlaps'].toarray(), actor_gt_overlaps, axis=0 ) entry['actor_gt_overlaps'] = scipy.sparse.csr_matrix(entry['actor_gt_overlaps']) entry['action_gt_overlaps'] = np.append( entry['action_gt_overlaps'].toarray(), action_gt_overlaps, axis=0 ) entry['action_gt_overlaps'] = scipy.sparse.csr_matrix(entry['action_gt_overlaps']) entry['is_crowd'] = np.append( entry['is_crowd'], np.zeros((num_boxes), dtype=entry['is_crowd'].dtype) ) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map.astype( entry['box_to_gt_ind_map'].dtype, copy=False ) )