def _merge_proposal_boxes_into_roidb(roidb, box_list): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) for i, entry in enumerate(roidb): boxes = box_list[i] num_boxes = boxes.shape[0] gt_overlaps = np.zeros((num_boxes, entry['gt_overlaps'].shape[1]), dtype=entry['gt_overlaps'].dtype) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). gt_inds = np.where(entry['gt_classes'] > 0)[0] if len(gt_inds) > 0: gt_boxes = entry['boxes'][gt_inds, :] gt_classes = entry['gt_classes'][gt_inds] proposal_to_gt_overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) argmaxes = proposal_to_gt_overlaps.argmax(axis=1) # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) # Those boxes with non-zero overlap with gt boxes I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] entry['boxes'] = np.append(entry['boxes'], boxes.astype(entry['boxes'].dtype, copy=False), axis=0) entry['gt_classes'] = np.append( entry['gt_classes'], np.zeros((num_boxes), dtype=entry['gt_classes'].dtype)) entry['seg_areas'] = np.append( entry['seg_areas'], np.zeros((num_boxes), dtype=entry['seg_areas'].dtype)) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append( entry['is_crowd'], np.zeros((num_boxes), dtype=entry['is_crowd'].dtype)) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map.astype(entry['box_to_gt_ind_map'].dtype, copy=False))
def compute_bbox_regression_targets(entry, stage_num): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs rois = entry['boxes'] overlaps = entry['max_overlaps'] labels = entry['max_classes'] gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # Targets has format (class, tx, ty, tw, th) targets = np.zeros((rois.shape[0], 5), dtype=np.float32) if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return targets # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = box_utils.bbox_overlaps( rois[ex_inds, :].astype(dtype=np.float32, copy=False), rois[gt_inds, :].astype(dtype=np.float32, copy=False)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] if stage_num == 0: bbox_reg_weights = cfg.MODEL.BBOX_REG_WEIGHTS if stage_num == 1: bbox_reg_weights = cfg.CASCADERCNN.BBOX_REG_WEIGHTS_STAGE1 elif stage_num == 2: bbox_reg_weights = cfg.CASCADERCNN.BBOX_REG_WEIGHTS_STAGE2 elif stage_num == 3: bbox_reg_weights = cfg.CASCADERCNN.BBOX_REG_WEIGHTS_STAGE3 # Use class "1" for all boxes if using class_agnostic_bbox_reg targets[ex_inds, 0] = (1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds]) targets[ex_inds, 1:] = box_utils.bbox_transform_inv(ex_rois, gt_rois, bbox_reg_weights) return targets
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32_3rd'] > 0)[0] roi_has_mask = blobs['labels_int32_3rd'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32_3rd'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32_3rd'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def _get_retinanet_blobs(foas, all_anchors, gt_boxes, gt_classes, im_width, im_height): total_anchors = all_anchors.shape[0] logger.debug('Getting mad blobs: im_height {} im_width: {}'.format( im_height, im_width)) inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.float32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max)[0] # Fg label: for each gt use anchors with highest overlap # (including ties) gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap] labels[anchors_with_max_overlap] = gt_classes[gt_inds] # Fg label: above threshold IOU inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP gt_inds = anchor_to_gt_argmax[inds] labels[inds] = gt_classes[gt_inds] fg_inds = np.where(labels >= 1)[0] bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0] labels[bg_inds] = 0 num_fg, num_bg = len(fg_inds), len(bg_inds) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]) # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size end_idx = start_idx + H * W _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, height, width) _labels = _labels.reshape((1, 1, H, W)) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape( (1, H, W, 4)).transpose(0, 3, 1, 2) stride = foa.stride w = int(im_width / stride) h = int(im_height / stride) # data for select_smooth_l1 loss num_classes = cfg.MODEL.NUM_CLASSES - 1 inds_4d = np.where(_labels > 0) M = len(inds_4d) _roi_bbox_targets = np.zeros((0, 4)) _roi_fg_bbox_locs = np.zeros((0, 4)) if M > 0: im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3] _roi_bbox_targets = np.zeros((len(im_inds), 4)) _roi_fg_bbox_locs = np.zeros((len(im_inds), 4)) lbls = _labels[im_inds, :, y, x] for i, lbl in enumerate(lbls): l = lbl[0] - 1 if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: l = 0 assert l >= 0 and l < num_classes, 'label out of the range' _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]] _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]]) blobs_out.append( dict( retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32), retnet_roi_bbox_targets=_roi_bbox_targets.astype(np.float32), retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(np.float32), )) out_num_fg = np.array([num_fg + 1.0], dtype=np.float32) out_num_bg = (np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) + out_num_fg * (cfg.MODEL.NUM_CLASSES - 2)) return blobs_out, out_num_fg, out_num_bg
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes): total_anchors = all_anchors.shape[0] straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH if straddle_thresh >= 0: # Only keep anchors inside the image by a margin of straddle_thresh # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all # anchors inds_inside = np.where( (all_anchors[:, 0] >= -straddle_thresh) & (all_anchors[:, 1] >= -straddle_thresh) & (all_anchors[:, 2] < im_width + straddle_thresh) & (all_anchors[:, 3] < im_height + straddle_thresh))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] else: inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.int32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max)[0] # Fg label: for each gt use anchors with highest overlap # (including ties) labels[anchors_with_max_overlap] = 1 # Fg label: above threshold IOU labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 fg_inds = np.where(labels == 1)[0] # subsample negative labels if we have too many # (samples with replacement, but since the set of bg inds is large most # samples will not have repeats) num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1) bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0] if len(bg_inds) > num_bg: enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)] labels[enable_inds] = 0 bg_inds = np.where(labels == 0)[0] bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]) # Bbox regression loss has the form: # loss(x) = weight_outside * L(weight_inside * x) # Inside weights allow us to set zero loss on an element-wise basis # Bbox regression is only trained on positive examples so we set their # weights to 1.0 (or otherwise if config is different) and 0 otherwise bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0) # The bbox regression loss only averages by the number of images in the # mini-batch, whereas we need to average by the total number of example # anchors selected # Outside weights are used to scale each element-wise loss so the final # average over the mini-batch is correct bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) bbox_outside_weights[labels == 1, :] = 1.0 / num_examples bbox_outside_weights[labels == 0, :] = 1.0 / num_examples # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = data_utils.unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = data_utils.unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size A = foa.num_cell_anchors end_idx = start_idx + H * W * A _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :] _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, A, height, width) _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_inside_weights output with shape (1, 4 * A, height, width) _bbox_inside_weights = _bbox_inside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_outside_weights output with shape (1, 4 * A, height, width) _bbox_outside_weights = _bbox_outside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) blobs_out.append( dict(rpn_labels_int32_wide=_labels, rpn_bbox_targets_wide=_bbox_targets, rpn_bbox_inside_weights_wide=_bbox_inside_weights, rpn_bbox_outside_weights_wide=_bbox_outside_weights)) return blobs_out[0] if len(blobs_out) == 1 else blobs_out
def evaluate_box_proposals(json_dataset, roidb, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2] ] # 512-inf assert area in areas, 'Unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for entry in roidb: gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_boxes = entry['boxes'][gt_inds, :] gt_areas = entry['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) non_gt_inds = np.where(entry['gt_classes'] == 0)[0] boxes = entry['boxes'][non_gt_inds, :] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(min(boxes.shape[0], gt_boxes.shape[0])): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps, 'num_pos': num_pos }