def rel_assignments_gt_boxes(roidb, im_inds): fg_rels = [] num_img = len(roidb) is_cand = (im_inds[:, None] == im_inds[None]) is_cand[np.arange(im_inds.shape[0]), np.arange(im_inds.shape[0])] = False for i in range(num_img): gt_boxes_i = roidb[i]['boxes'] sbj_gt_boxes_i = roidb[i]['sbj_gt_boxes'] obj_gt_boxes_i = roidb[i]['obj_gt_boxes'] prd_gt_classes_i = roidb[i]['prd_gt_classes'] if cfg.MODEL.USE_BG: prd_gt_classes_i += 1 sbj_gt_inds_i = box_utils.bbox_overlaps(sbj_gt_boxes_i, gt_boxes_i).argmax(-1) obj_gt_inds_i = box_utils.bbox_overlaps(obj_gt_boxes_i, gt_boxes_i).argmax(-1) im_id_i = np.ones_like(sbj_gt_inds_i) * i gt_rels_i = np.stack( (im_id_i, sbj_gt_inds_i, obj_gt_inds_i, prd_gt_classes_i), -1) fg_rels.append(gt_rels_i) fg_rels = np.concatenate(fg_rels, 0) offset = {} for i, s, e in enumerate_by_image(im_inds): offset[i] = s for i, s, e in enumerate_by_image(fg_rels[:, 0]): fg_rels[s:e, 1:3] += offset[i] is_cand[fg_rels[:, 1], fg_rels[:, 2]] = False num_fg = min(fg_rels.shape[0], int(cfg.TRAIN.FG_REL_SIZE_PER_IM * num_img)) if fg_rels.shape[0] > num_fg: fg_ind = np.random.choice(fg_rels.shape[0], num_fg, replace=False) fg_rels = fg_rels[fg_ind] sbj_bg_inds, obj_bg_inds = np.where(is_cand) bg_rels = np.stack((im_inds[sbj_bg_inds].astype(sbj_bg_inds.dtype), sbj_bg_inds, \ obj_bg_inds, np.zeros_like(sbj_bg_inds)), -1) num_bg = min( bg_rels.shape[0], int(cfg.TRAIN.FG_REL_SIZE_PER_IM / cfg.TRAIN.FG_REL_FRACTION * num_img - num_fg)) if num_bg > 0: if bg_rels.shape[0] > num_bg: bg_ind = np.random.choice(bg_rels.shape[0], num_bg, replace=False) bg_rels = bg_rels[bg_ind] rel_labels = np.concatenate((fg_rels, bg_rels), 0) else: rel_labels = fg_rels return rel_labels[:, :-1], rel_labels
def _compute_pred_matches(gt_triplets, pred_triplets, gt_boxes, pred_boxes, iou_thresh=0.5, phrdet=False): """ Given a set of predicted triplets, return the list of matching GT's for each of the given predictions :param gt_triplets: :param pred_triplets: :param gt_boxes: :param pred_boxes: :param iou_thresh: Do y :return: """ # This performs a matrix multiplication-esque thing between the two arrays # Instead of summing, we want the equality, so we reduce in that way # The rows correspond to GT triplets, columns to pred triplets keeps = intersect_2d(gt_triplets, pred_triplets) gt_has_match = keeps.any(1) pred_to_gt = [[] for x in range(pred_boxes.shape[0])] for gt_ind, gt_box, keep_inds in zip( np.where(gt_has_match)[0], gt_boxes[gt_has_match], keeps[gt_has_match], ): boxes = pred_boxes[keep_inds] if phrdet: # Evaluate where the union box > 0.5 gt_box_union = gt_box.reshape((2, 4)) gt_box_union = np.concatenate( (gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0) box_union = boxes.reshape((-1, 2, 4)) box_union = np.concatenate( (box_union.min(1)[:, :2], box_union.max(1)[:, 2:]), 1) gt_box_union = gt_box_union.astype(dtype=np.float32, copy=False) box_union = box_union.astype(dtype=np.float32, copy=False) inds = bbox_overlaps(gt_box_union[None], box_union=box_union)[0] >= iou_thresh else: gt_box = gt_box.astype(dtype=np.float32, copy=False) boxes = boxes.astype(dtype=np.float32, copy=False) sub_iou = bbox_overlaps(gt_box[None, :4], boxes[:, :4])[0] obj_iou = bbox_overlaps(gt_box[None, 4:], boxes[:, 4:])[0] inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh) for i in np.where(keep_inds)[0][inds]: pred_to_gt[i].append(int(gt_ind)) return pred_to_gt
def get_importance_factor(select_rois, sbj_gt_boxes, obj_gt_boxes, im_info): select_boxes = select_rois[:, 1:] / im_info[0, 2].data.cpu().numpy() sbj_count = (box_utils.bbox_overlaps(select_boxes, sbj_gt_boxes) > 0.5).astype(np.float32).sum(-1) obj_count = (box_utils.bbox_overlaps(select_boxes, obj_gt_boxes) > 0.5).astype(np.float32).sum(-1) pair_count = sbj_count + obj_count theta = pair_count / np.maximum(pair_count.sum(), 1e-12) gamma = np.minimum(2.0, -((1 - 2 * theta)**5) * np.log(2 * theta)) return gamma
def _merge_proposal_boxes_into_roidb(roidb, box_list): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) # for each of the images, merge the proposals for i, entry in enumerate(roidb): boxes = box_list[i] num_boxes = boxes.shape[0] gt_overlaps = np.zeros((num_boxes, entry['gt_overlaps'].shape[1]), dtype=entry['gt_overlaps'].dtype) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). gt_inds = np.where(entry['gt_classes'] > 0)[0] if len(gt_inds) > 0: gt_boxes = entry['boxes'][gt_inds, :] gt_classes = entry['gt_classes'][gt_inds] proposal_to_gt_overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) argmaxes = proposal_to_gt_overlaps.argmax(axis=1) # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) # Those boxes with non-zero overlap with gt boxes I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] entry['boxes'] = np.append(entry['boxes'], boxes.astype(entry['boxes'].dtype, copy=False), axis=0) entry['gt_classes'] = np.append( entry['gt_classes'], np.zeros((num_boxes), dtype=entry['gt_classes'].dtype)) # hmm i do not think it matters here # max_attr_per_ins = entry['gt_attributes'].shape[1] # entry['gt_attributes'] = np.append( # entry['gt_attributes'], # np.zeros((num_boxes, max_attr_per_ins), dtype=entry['gt_attributes'].dtype), # axis=0 # ) entry['seg_areas'] = np.append( entry['seg_areas'], np.zeros((num_boxes), dtype=entry['seg_areas'].dtype)) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append( entry['is_crowd'], np.zeros((num_boxes), dtype=entry['is_crowd'].dtype)) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map.astype(entry['box_to_gt_ind_map'].dtype, copy=False))
def _build_graph(boxes, iou_threshold): """Build graph based on box IoU""" overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), boxes.astype(dtype=np.float32, copy=False)) return (overlaps > iou_threshold).astype(np.float32)
def _compute_targets(entry): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs rois = entry['boxes'] overlaps = entry['max_overlaps'] labels = entry['max_classes'] gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # Targets has format (class, tx, ty, tw, th) targets = np.zeros((rois.shape[0], 5), dtype=np.float32) if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return targets # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = box_utils.bbox_overlaps( rois[ex_inds, :].astype(dtype=np.float32, copy=False), rois[gt_inds, :].astype(dtype=np.float32, copy=False)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] # Use class "1" for all boxes if using class_agnostic_bbox_reg targets[ex_inds, 0] = ( 1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds]) targets[ex_inds, 1:] = box_utils.bbox_transform_inv( ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS) return targets
def _get_gt_bboxes_overlaps(self, entry): gt_boxes = entry['boxes'] gt_to_gt_overlaps = box_utils.bbox_overlaps( gt_boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False) ) return gt_to_gt_overlaps
def _compute_targets(entry): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs rois = entry['boxes'] overlaps = entry['max_overlaps'] labels = entry['max_classes'] gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # Targets has format (class, tx, ty, tw, th) targets = np.zeros((rois.shape[0], 5), dtype=np.float32) if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return targets # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = box_utils.bbox_overlaps( rois[ex_inds, :].astype(dtype=np.float32, copy=False), rois[gt_inds, :].astype(dtype=np.float32, copy=False)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] # Use class "1" for all boxes if using class_agnostic_bbox_reg targets[ex_inds, 0] = (1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds]) targets[ex_inds, 1:] = box_utils.bbox_transform_inv(ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS) return targets
def get_rel_inds(self, det_rois, det_labels, roidb, im_info): num_img = int(det_rois[:, 0].max()) + 1 im_inds = det_rois[:, 0].astype(np.int64) # TODO: Not support sgdet mode training yet. if self.training: return relpn_heads.rel_assignments(im_inds, det_rois, det_labels, roidb, im_info, \ num_sample_per_gt=1, filter_non_overlap=True) else: if cfg.TRAIN.GT_BOXES: fg_rels = [] is_cand = (im_inds[:, None] == im_inds[None]) is_cand[np.arange(im_inds.shape[0]), np.arange(im_inds.shape[0])] = False for i in range(num_img): gt_boxes_i = roidb[i]['boxes'] sbj_gt_boxes_i = roidb[i]['sbj_gt_boxes'] obj_gt_boxes_i = roidb[i]['obj_gt_boxes'] sbj_gt_inds_i = box_utils.bbox_overlaps(sbj_gt_boxes_i, gt_boxes_i).argmax(-1) obj_gt_inds_i = box_utils.bbox_overlaps(obj_gt_boxes_i, gt_boxes_i).argmax(-1) im_id_i = np.ones_like(sbj_gt_inds_i) * i gt_rels_i = np.stack((im_id_i, sbj_gt_inds_i, obj_gt_inds_i), -1) fg_rels.append(gt_rels_i) rel_inds = np.concatenate(fg_rels, 0) else: is_cand = (im_inds[:, None] == im_inds[None]) is_cand[np.arange(im_inds.shape[0]), np.arange(im_inds.shape[0])] = False is_cand = (box_utils.bbox_overlaps(det_rois[:, 1:], det_rois[:, 1:]) > 0) & is_cand # raise FError('not support this mode!') sbj_ind, obj_ind = np.where(is_cand) if len(sbj_ind) == 0: sbj_ind, obj_ind = np.zeros(1, dtype=np.int64), np.zeros(1, dtype=np.int64) rel_inds = np.stack((det_rois[sbj_ind, 0].astype(sbj_ind.dtype), sbj_ind, obj_ind), -1) return rel_inds, None
def predbox_roi_iou(raw_roi, pred_box): if raw_roi.size == 0: raw_roi = np.zeros((1, 4), dtype="float32") if pred_box.size == 0: pred_box = np.zeros((1, 4), dtype="float32") iou = box_utils.bbox_overlaps(raw_roi, pred_box) roi_iou = iou.max(axis=1) return roi_iou
def rel_samples(det_rois, edge_inds, im_info, roidb): num_img = int(det_rois[:, 0].max() + 1) edge_indices_sets = [ np.where(edge_inds[:, 0] == i)[0] for i in range(num_img) ] fg_rels = [] bg_rels = [] for i, edge_indices in enumerate(edge_indices_sets): edge_inds_i = edge_inds[edge_indices] sbj_gt_rois_i = roidb[i]['sbj_gt_boxes'] * im_info[ i, 2].data.cpu().numpy() obj_gt_rois_i = roidb[i]['obj_gt_boxes'] * im_info[ i, 2].data.cpu().numpy() prd_gt_classes_i = roidb[i]['prd_gt_classes'] if cfg.MODEL.USE_BG: prd_gt_classes_i += 1 min_ious_i = np.minimum(box_utils.bbox_overlaps(det_rois[edge_inds_i[:, 1]][:, 1:], sbj_gt_rois_i) , \ box_utils.bbox_overlaps(det_rois[edge_inds_i[:, 2]][:, 1:], obj_gt_rois_i)) edge_rels_i = np.pad(edge_inds_i, ((0, 0), (0, 1)), 'constant') fg_inds_i = np.where(min_ious_i.max(-1) >= cfg.TRAIN.FG_THRESH)[0] edge_rels_i[fg_inds_i, -1] = prd_gt_classes_i[min_ious_i.argmax(-1)[fg_inds_i]] fg_rels.append(edge_rels_i[edge_rels_i[:, -1] > 0]) bg_rels.append(edge_rels_i[edge_rels_i[:, -1] == 0]) fg_rels = np.concatenate(fg_rels, 0) bg_rels = np.concatenate(bg_rels, 0) num_fg = min(fg_rels.shape[0], int(cfg.TRAIN.FG_REL_SIZE_PER_IM * num_img)) num_bg = min( bg_rels.shape[0], int(cfg.TRAIN.FG_REL_SIZE_PER_IM / cfg.TRAIN.FG_REL_FRACTION * num_img - num_fg)) if fg_rels.shape[0] > num_fg: fg_ind = npr.choice(fg_rels.shape[0], num_fg, replace=False) fg_rels = fg_rels[fg_ind] if num_bg > 0: if bg_rels.shape[0] > num_bg: bg_ind = npr.choice(bg_rels.shape[0], num_bg, replace=False) bg_rels = bg_rels[bg_ind] rel_labels = np.concatenate((fg_rels, bg_rels), 0) else: rel_labels = fg_rels return rel_labels
def stats_calculator(all_proposals, gt_i): iou_mat = box_utils.bbox_overlaps(all_proposals, gt_i) max_inds = np.argmax(iou_mat, axis=1) max_element = np.max(iou_mat, axis=1) thrsh_inds = np.where(max_element >= 0) max_inds = max_inds[thrsh_inds] all_proposals = all_proposals[thrsh_inds] # IOU(Intersection Over Union) max_element = max_element[thrsh_inds] center_point_distance = [] iou_over_gt = [] for ind, item in enumerate(all_proposals): # item是迭代中的pp, matched_gt是迭代中的ground_truth matched_gt_ind = max_inds[int(ind)] matched_gt = gt_i[matched_gt_ind] matched_gt_width = matched_gt[2] - matched_gt[0] + 1 matched_gt_height = matched_gt[3] - matched_gt[1] + 1 gt_center_point = (matched_gt_width / 2, matched_gt_height / 2) pp_width = item[2] - item[0] + 1 pp_height = item[3] - item[1] + 1 pp_center_point = (pp_width / 2, pp_height / 2) distance = np.sqrt( np.square(gt_center_point[0] - pp_center_point[0]) + np.square(gt_center_point[1] - pp_center_point[1])) # DoC: Distance of Centers(normalized) dis_width = matched_gt_width / 2 + pp_width / 2 dis_height = matched_gt_height / 2 + pp_height / 2 distance = distance / np.sqrt( np.square(dis_width) + np.square(dis_height)) # 计算intersect面积 center_point_distance.append(distance) iw = min(item[2], matched_gt[2]) - max(item[0], matched_gt[0]) + 1 intersect = 0 if iw > 0: ih = min(item[3], matched_gt[3]) - max(item[1], matched_gt[1]) + 1 if ih > 0: intersect = iw * ih gt_area = matched_gt_height * matched_gt_width assert gt_area > 0 # Intersection Over GT iou_over_gt.append(intersect / gt_area) center_point_distance = np.array(center_point_distance, dtype=np.float32) iou_over_gt = np.array(iou_over_gt, dtype=np.float32) return max_element, center_point_distance, iou_over_gt
def _merge_compute_boxes_into_roidb(roidb, box_list): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) for i, entry in enumerate(roidb): boxes = box_list[i] # gt + det #print('len boxes:', len(boxes)) num_boxes = boxes.shape[0] gt_overlaps = np.zeros((num_boxes, entry['gt_overlaps'].shape[1]), dtype=entry['gt_overlaps'].dtype) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). gt_inds = np.where(entry['gt_classes'] > 0)[0] if len(gt_inds) > 0: gt_boxes = entry['boxes'][gt_inds, :] gt_classes = entry['gt_classes'][gt_inds] # import ipdb; ipdb.set_trace() proposal_to_gt_overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) argmaxes = proposal_to_gt_overlaps.argmax(axis=1) # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) # Those boxes with non-zero overlap with gt boxes I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] # import ipdb; ipdb.set_trace() entry['boxes'] = boxes.astype(entry['boxes'].dtype, copy=False) entry['box_to_gt_ind_map'] = box_to_gt_ind_map.astype( entry['box_to_gt_ind_map'].dtype, copy=False) gt_to_classes = -np.ones(len(entry['box_to_gt_ind_map'])) matched_ids = np.where(entry['box_to_gt_ind_map'] > -1)[0] gt_to_classes[matched_ids] = entry['gt_classes'][ entry['box_to_gt_ind_map'][matched_ids]] entry['gt_classes'] = gt_to_classes entry['seg_areas'] = np.zeros((num_boxes), dtype=entry['seg_areas'].dtype) entry['gt_overlaps'] = gt_overlaps entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) is_to_crowd = np.ones(len(entry['box_to_gt_ind_map'])) is_to_crowd[matched_ids] = entry['is_crowd'][entry['box_to_gt_ind_map'] [matched_ids]] entry['is_crowd'] = is_to_crowd
def _sample_pairs(self, det_rois, edge_inds, im_info, roidb): sbj_gt_rois = roidb['sbj_gt_boxes'] * im_info[2].data.cpu().numpy() obj_gt_rois = roidb['obj_gt_boxes'] * im_info[2].data.cpu().numpy() p_ious = (box_utils.bbox_overlaps(det_rois[edge_inds[:, 1]][:, 1:], sbj_gt_rois) * \ box_utils.bbox_overlaps(det_rois[edge_inds[:, 2]][:, 1:], obj_gt_rois)).max(-1) fg_inds = np.where(p_ious >= cfg.TRAIN.PRUNE_PAIRS_POSTIVE_OVERLAP)[0] bg_inds = np.where(p_ious < cfg.TRAIN.PRUNE_PAIRS_NEGATIVE_OVERLAP)[0] num_fg = min( fg_inds.shape[0], cfg.TRAIN.PRUNE_PAIRS_FG_FRACTION * cfg.TRAIN.PRUNE_PAIRS_BATCHSIZE) num_bg = min(bg_inds.shape[0], cfg.TRAIN.PRUNE_PAIRS_BATCHSIZE - num_fg) if fg_inds.shape[0] > num_fg: fg_inds = npr.choice(fg_inds, size=int(num_fg), replace=False) if bg_inds.shape[0] > num_bg: bg_inds = npr.choice(bg_inds, size=int(num_bg), replace=False) labels = np.concatenate( (np.ones_like(fg_inds), np.zeros_like(bg_inds)), 0) keep_inds = np.concatenate((fg_inds, bg_inds), 0) return keep_inds, labels
def _do_test(b1, b2): # Compute IoU overlap with the cython implementation cython_iou = box_utils.bbox_overlaps(b1, b2) # Compute IoU overlap with the COCO API implementation # (requires converting boxes from xyxy to xywh format) xywh_b1 = box_utils.xyxy_to_xywh(b1) xywh_b2 = box_utils.xyxy_to_xywh(b2) not_crowd = [int(False)] * b2.shape[0] coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd) # IoUs should be similar np.testing.assert_array_almost_equal(cython_iou, coco_ious, decimal=5)
def _do_test(b1, b2): # Compute IoU overlap with the cython implementation cython_iou = box_utils.bbox_overlaps(b1, b2) # Compute IoU overlap with the COCO API implementation # (requires converting boxes from xyxy to xywh format) xywh_b1 = box_utils.xyxy_to_xywh(b1) xywh_b2 = box_utils.xyxy_to_xywh(b2) not_crowd = [int(False)] * b2.shape[0] coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd) # IoUs should be similar np.testing.assert_array_almost_equal( cython_iou, coco_ious, decimal=5 )
def box_filter(boxes, must_overlap=False): """ Only include boxes that overlap as possible relations. If no overlapping boxes, use all of them.""" n_cands = boxes.shape[0] overlaps = box_utils.bbox_overlaps(boxes.astype(np.float32), boxes.astype(np.float32)) > 0 np.fill_diagonal(overlaps, 0) all_possib = np.ones_like(overlaps, dtype=np.bool) np.fill_diagonal(all_possib, 0) if must_overlap: possible_boxes = np.column_stack(np.where(overlaps)) if possible_boxes.size == 0: possible_boxes = np.column_stack(np.where(all_possib)) else: possible_boxes = np.column_stack(np.where(all_possib)) return possible_boxes
def get_proposal_clusters(all_rois, proposals, im_labels): """Generate a random sample of RoIs comprising foreground and background examples. """ num_images, num_classes = im_labels.shape assert num_images == 1, 'batch size shoud be equal to 1' # overlaps: (rois x gt_boxes) gt_boxes = proposals['gt_boxes'] gt_labels = proposals['gt_classes'] gt_scores = proposals['gt_scores'] overlaps = box_utils.bbox_overlaps( all_rois.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_labels[gt_assignment, 0] cls_loss_weights = gt_scores[gt_assignment, 0] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Select background RoIs as those with < FG_THRESH overlap bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0] ig_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH)[0] cls_loss_weights[ig_inds] = 0.0 labels[bg_inds] = 0 if cfg.MODEL.WITH_FRCNN: bbox_targets = _compute_targets(all_rois, gt_boxes[gt_assignment, :], labels) bbox_targets, bbox_inside_weights = _expand_bbox_targets(bbox_targets) bbox_outside_weights = np.array( bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) \ * cls_loss_weights.reshape(-1, 1) else: bbox_targets, bbox_inside_weights, bbox_outside_weights = np.array( [0]), np.array([0]), np.array([0]) gt_assignment[bg_inds] = -1 return labels, cls_loss_weights, gt_assignment, bbox_targets, bbox_inside_weights, bbox_outside_weights
def _get_proposal_clusters(all_rois, proposals, im_labels, cls_prob): """Generate a random sample of RoIs comprising foreground and background examples. """ num_images, num_classes = im_labels.shape assert num_images == 1, 'batch size shoud be equal to 1' # overlaps: (rois x gt_boxes) gt_boxes = proposals['gt_boxes'] gt_labels = proposals['gt_classes'] gt_scores = proposals['gt_scores'] overlaps = box_utils.bbox_overlaps( all_rois.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_labels[gt_assignment, 0] cls_loss_weights = gt_scores[gt_assignment, 0] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Select background RoIs as those with < FG_THRESH overlap bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0] ig_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH)[0] cls_loss_weights[ig_inds] = 0.0 labels[bg_inds] = 0 gt_assignment[bg_inds] = -1 img_cls_loss_weights = np.zeros(gt_boxes.shape[0], dtype=np.float32) pc_probs = np.zeros(gt_boxes.shape[0], dtype=np.float32) pc_labels = np.zeros(gt_boxes.shape[0], dtype=np.int32) pc_count = np.zeros(gt_boxes.shape[0], dtype=np.int32) for i in xrange(gt_boxes.shape[0]): po_index = np.where(gt_assignment == i)[0] img_cls_loss_weights[i] = np.sum(cls_loss_weights[po_index]) pc_labels[i] = gt_labels[i, 0] pc_count[i] = len(po_index) pc_probs[i] = np.average(cls_prob[po_index, pc_labels[i]]) return labels, cls_loss_weights, gt_assignment, pc_labels, pc_probs, pc_count, img_cls_loss_weights
def _calculate_gt_bbox_overlaps(self, entry, threshold, is_same_cls=True): """ Calculate the overlap between gt bbox, will be used for merging significantly overlaped bbox to one single bbox """ gt_boxes = entry['boxes'] segms = entry['segms'] gt_classes = entry['gt_classes'] num_valid_objs = gt_boxes.shape[0] gt_to_gt_overlaps = box_utils.bbox_overlaps( gt_boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False) ) # keep the up-triangle matrix gt_to_gt_overlaps = np.triu(gt_to_gt_overlaps, k=1) # only cares for overlap in the same class if is_same_cls: mask = ((gt_classes[:, np.newaxis] - gt_classes[np.newaxis, :]) == 0) gt_to_gt_overlaps = gt_to_gt_overlaps[mask] num_overlap = np.sum(gt_to_gt_overlaps > threshold) return num_overlap
def _merge_paired_boxes_into_roidb(roidb, sbj_box_list, obj_box_list): assert len(sbj_box_list) == len(obj_box_list) == len(roidb) for i, entry in enumerate(roidb): sbj_boxes = sbj_box_list[i] obj_boxes = obj_box_list[i] assert sbj_boxes.shape[0] == obj_boxes.shape[0] num_pairs = sbj_boxes.shape[0] sbj_gt_overlaps = np.zeros( (num_pairs, entry['sbj_gt_overlaps'].shape[1]), dtype=entry['sbj_gt_overlaps'].dtype ) obj_gt_overlaps = np.zeros( (num_pairs, entry['obj_gt_overlaps'].shape[1]), dtype=entry['obj_gt_overlaps'].dtype ) prd_gt_overlaps = np.zeros( (num_pairs, entry['prd_gt_overlaps'].shape[1]), dtype=entry['prd_gt_overlaps'].dtype ) pair_to_gt_ind_map = -np.ones( (num_pairs), dtype=entry['pair_to_gt_ind_map'].dtype ) pair_gt_inds = np.arange(entry['prd_gt_classes'].shape[0]) if len(pair_gt_inds) > 0: sbj_gt_boxes = entry['sbj_gt_boxes'][pair_gt_inds, :] sbj_gt_classes = entry['sbj_gt_classes'][pair_gt_inds] obj_gt_boxes = entry['obj_gt_boxes'][pair_gt_inds, :] obj_gt_classes = entry['obj_gt_classes'][pair_gt_inds] prd_gt_classes = entry['prd_gt_classes'][pair_gt_inds] sbj_to_gt_overlaps = box_utils.bbox_overlaps( sbj_boxes.astype(dtype=np.float32, copy=False), sbj_gt_boxes.astype(dtype=np.float32, copy=False) ) obj_to_gt_overlaps = box_utils.bbox_overlaps( obj_boxes.astype(dtype=np.float32, copy=False), obj_gt_boxes.astype(dtype=np.float32, copy=False) ) pair_to_gt_overlaps = np.minimum(sbj_to_gt_overlaps, obj_to_gt_overlaps) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) sbj_argmaxes = sbj_to_gt_overlaps.argmax(axis=1) sbj_maxes = sbj_to_gt_overlaps.max(axis=1) # Amount of that overlap sbj_I = np.where(sbj_maxes >= 0)[0] # Those boxes with non-zero overlap with gt boxes, get all items obj_argmaxes = obj_to_gt_overlaps.argmax(axis=1) obj_maxes = obj_to_gt_overlaps.max(axis=1) # Amount of that overlap obj_I = np.where(obj_maxes >= 0)[0] # Those boxes with non-zero overlap with gt boxes, get all items pair_argmaxes = pair_to_gt_overlaps.argmax(axis=1) pair_maxes = pair_to_gt_overlaps.max(axis=1) # Amount of that overlap pair_I = np.where(pair_maxes >= 0)[0] # Those boxes with non-zero overlap with gt boxes, get all items # Record max overlaps with the class of the appropriate gt box sbj_gt_overlaps[sbj_I, sbj_gt_classes[sbj_argmaxes[sbj_I]]] = sbj_maxes[sbj_I] obj_gt_overlaps[obj_I, obj_gt_classes[obj_argmaxes[obj_I]]] = obj_maxes[obj_I] prd_gt_overlaps[pair_I, prd_gt_classes[pair_argmaxes[pair_I]]] = pair_maxes[pair_I] pair_to_gt_ind_map[pair_I] = pair_gt_inds[pair_argmaxes[pair_I]] entry['sbj_boxes'] = sbj_boxes.astype(entry['sbj_gt_boxes'].dtype, copy=False) entry['sbj_gt_overlaps'] = sbj_gt_overlaps entry['sbj_gt_overlaps'] = scipy.sparse.csr_matrix(entry['sbj_gt_overlaps']) entry['obj_boxes'] = obj_boxes.astype(entry['obj_gt_boxes'].dtype, copy=False) entry['obj_gt_overlaps'] = obj_gt_overlaps entry['obj_gt_overlaps'] = scipy.sparse.csr_matrix(entry['obj_gt_overlaps']) entry['prd_gt_classes'] = -np.ones((num_pairs), dtype=entry['prd_gt_classes'].dtype) entry['prd_gt_overlaps'] = prd_gt_overlaps entry['prd_gt_overlaps'] = scipy.sparse.csr_matrix(entry['prd_gt_overlaps']) entry['pair_to_gt_ind_map'] = pair_to_gt_ind_map.astype( entry['pair_to_gt_ind_map'].dtype, copy=False)
def _get_retinanet_blobs( foas, all_anchors, gt_boxes, gt_classes, im_width, im_height): total_anchors = all_anchors.shape[0] logger.debug('Getting mad blobs: im_height {} im_width: {}'.format( im_height, im_width)) inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.float32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[ np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max)[0] # Fg label: for each gt use anchors with highest overlap # (including ties) gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap] labels[anchors_with_max_overlap] = gt_classes[gt_inds] # Fg label: above threshold IOU inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP gt_inds = anchor_to_gt_argmax[inds] labels[inds] = gt_classes[gt_inds] fg_inds = np.where(labels >= 1)[0] bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0] labels[bg_inds] = 0 num_fg, num_bg = len(fg_inds), len(bg_inds) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]) # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size end_idx = start_idx + H * W _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, height, width) _labels = _labels.reshape((1, 1, H, W)) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape((1, H, W, 4)).transpose(0, 3, 1, 2) stride = foa.stride w = int(im_width / stride) h = int(im_height / stride) # data for select_smooth_l1 loss num_classes = cfg.MODEL.NUM_CLASSES - 1 inds_4d = np.where(_labels > 0) M = len(inds_4d) _roi_bbox_targets = np.zeros((0, 4)) _roi_fg_bbox_locs = np.zeros((0, 4)) if M > 0: im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3] _roi_bbox_targets = np.zeros((len(im_inds), 4)) _roi_fg_bbox_locs = np.zeros((len(im_inds), 4)) lbls = _labels[im_inds, :, y, x] for i, lbl in enumerate(lbls): l = lbl[0] - 1 if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: l = 0 assert l >= 0 and l < num_classes, 'label out of the range' _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]] _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]]) blobs_out.append( dict( retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32), retnet_roi_bbox_targets=_roi_bbox_targets.astype(np.float32), retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(np.float32), )) out_num_fg = np.array([num_fg + 1.0], dtype=np.float32) out_num_bg = ( np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) + out_num_fg * (cfg.MODEL.NUM_CLASSES - 2)) return blobs_out, out_num_fg, out_num_bg
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds] fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def evaluate_box_proposals(json_dataset, roidb, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2] ] # 512-inf assert area in areas, 'Unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for entry in roidb: gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_boxes = entry['boxes'][gt_inds, :] gt_areas = entry['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) non_gt_inds = np.where(entry['gt_classes'] == 0)[0] boxes = entry['boxes'][non_gt_inds, :] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(min(boxes.shape[0], gt_boxes.shape[0])): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps, 'num_pos': num_pos }
def _forward(self, data, im_info, do_vis=False, dataset_name=None, roidb=None, use_gt_labels=False, **rpn_kwargs): im_data = data if self.training: # if not isinstance(roidb[0], np.array): # roidb = roidb[0] roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb)) # only support one gpu if dataset_name is not None: dataset_name = blob_utils.deserialize(dataset_name) else: dataset_name = cfg.TRAIN.DATASETS[0] if self.training else cfg.TEST.DATASETS[0] # assuming only one dataset per run device_id = im_data.get_device() return_dict = {} # A dict to collect return variables blob_conv = self.Conv_Body(im_data) # if not cfg.MODEL.USE_REL_PYRAMID: # blob_conv_prd = self.Prd_RCNN.Conv_Body(im_data) if self.training: gt_rois = np.empty((0, 5), dtype=np.float32) gt_classes = np.empty((0), dtype=np.int64) for i, r in enumerate(roidb): rois_i = r['boxes'] * im_info[i, 2] rois_i = np.hstack((i * blob_utils.ones((rois_i.shape[0], 1)), rois_i)) gt_rois = np.append(gt_rois, rois_i, axis=0) gt_classes = np.append(gt_classes, r['gt_classes'], axis=0) if self.training or roidb is None: rpn_ret = self.RPN(blob_conv, im_info, roidb) if cfg.FPN.FPN_ON: # Retain only the blobs that will be used for RoI heads. `blob_conv` may include # extra blobs that are used for RPN proposals, but not for RoI heads. blob_conv = blob_conv[-self.num_roi_levels:] # if not cfg.MODEL.USE_REL_PYRAMID: # blob_conv_prd = blob_conv_prd[-self.num_roi_levels:] # else: # blob_conv_prd = self.RelPyramid(blob_conv) if self.training or roidb is None: if cfg.MODEL.SHARE_RES5 and self.training: box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret, use_relu=True) else: box_feat = self.Box_Head(blob_conv, rpn_ret, use_relu=True) cls_score, bbox_pred = self.Box_Outs(box_feat) # now go through the predicate branch use_relu = False if cfg.MODEL.NO_FC7_RELU else True if self.training: score_thresh = cfg.TEST.SCORE_THRESH cls_score = F.softmax(cls_score, -1) while score_thresh >= -1e-06: # a negative value very close to 0.0 det_rois, det_labels, det_scores, det_dists, det_boxes_all = \ self.prepare_det_rois(rpn_ret['rois'], cls_score, bbox_pred, im_info, score_thresh) real_area = (det_rois[:, 3] - det_rois[:, 1]) * (det_rois[:, 4] - det_rois[:, 2]) non_zero_area_inds = np.where(real_area > 0)[0] det_rois = det_rois[non_zero_area_inds] det_labels = det_labels[non_zero_area_inds] det_scores = det_scores[non_zero_area_inds] det_dists = det_dists[non_zero_area_inds] det_boxes_all = det_boxes_all[non_zero_area_inds] # rel_ret = self.RelPN(det_rois, det_labels, det_scores, im_info, dataset_name, roidb) valid_len = len(det_rois) if valid_len > 0: break logger.info('Got {} det_rois when score_thresh={}, changing to {}'.format( valid_len, score_thresh, score_thresh - 0.01)) score_thresh -= 0.01 det_labels_gt = [] ious = box_utils.bbox_overlaps(det_rois[:, 1:], gt_rois[:, 1:]) * \ (det_rois[:, 0][:,None] == gt_rois[:, 0][None, :]) det_labels_gt = gt_classes[ious.argmax(-1)] det_labels_gt[ious.max(-1) < cfg.TRAIN.FG_THRESH] = 0 else: if roidb is not None: # raise FError('not support this mode!') # assert len(roidb) == 1 im_scale = im_info.data.numpy()[:, 2][0] im_w = im_info.data.numpy()[:, 1][0] im_h = im_info.data.numpy()[:, 0][0] fpn_ret = {'gt_rois': gt_rois} if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS: lvl_min = cfg.FPN.ROI_MIN_LEVEL lvl_max = cfg.FPN.ROI_MAX_LEVEL rois_blob_names = ['gt_rois'] for rois_blob_name in rois_blob_names: # Add per FPN level roi blobs named like: <rois_blob_name>_fpn<lvl> target_lvls = fpn_utils.map_rois_to_fpn_levels( fpn_ret[rois_blob_name][:, 1:5], lvl_min, lvl_max) fpn_utils.add_multilevel_roi_blobs( fpn_ret, rois_blob_name, fpn_ret[rois_blob_name], target_lvls, lvl_min, lvl_max) det_feats = self.Box_Head(blob_conv, fpn_ret, rois_name='det_rois', use_relu=True) det_dists, _ = self.Box_Outs(det_feats) det_boxes_all = None if use_gt_labels: det_labels_gt = gt_classes det_labels = gt_classes else: score_thresh = cfg.TEST.SCORE_THRESH while score_thresh >= -1e-06: # a negative value very close to 0.0 det_rois, det_labels, det_scores, det_dists, det_boxes_all = \ self.prepare_det_rois(rpn_ret['rois'], cls_score, bbox_pred, im_info, score_thresh) real_area = (det_rois[:, 3] - det_rois[:, 1]) * (det_rois[:, 4] - det_rois[:, 2]) non_zero_area_inds = np.where(real_area > 0)[0] det_rois = det_rois[non_zero_area_inds] det_labels = det_labels[non_zero_area_inds] det_scores = det_scores[non_zero_area_inds] det_dists = det_dists[non_zero_area_inds] det_boxes_all = det_boxes_all[non_zero_area_inds] # rel_ret = self.RelPN(det_rois, det_labels, det_scores, im_info, dataset_name, roidb) valid_len = len(det_rois) if valid_len > 0: break logger.info('Got {} det_rois when score_thresh={}, changing to {}'.format( valid_len, score_thresh, score_thresh - 0.01)) score_thresh -= 0.01 return_dict['det_rois'] = det_rois num_rois = det_rois.shape[0] if not isinstance(det_dists, torch.Tensor): assert det_dists.shape[0] == num_rois det_dists = torch.from_numpy(det_dists).float().cuda(device_id) return_dict['det_dists'] = det_dists return_dict['det_scores'] = det_scores return_dict['blob_conv'] = blob_conv return_dict['det_boxes_all'] = det_boxes_all assert det_boxes_all.shape[0] == num_rois return_dict['det_labels'] = det_labels # return_dict['blob_conv_prd'] = blob_conv_prd if self.training or use_gt_labels: return_dict['det_labels_gt'] = det_labels_gt return return_dict
def _get_retinanet_blobs(foas, all_anchors, gt_boxes, gt_classes, im_width, im_height): total_anchors = all_anchors.shape[0] logger.debug('Getting mad blobs: im_height {} im_width: {}'.format( im_height, im_width)) inds_inside = np.arange(all_anchors.shape[0]) #0, 1... 371349 anchors = all_anchors num_inside = len(inds_inside) #371349 logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.float32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps( anchors, gt_boxes) # (371349, 17) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax( axis=1) # (371349,) this is index # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[ # (371349,) this is area np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax( axis=0) # (17,) index # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ # (17,) area gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( # (21,) find all anchors with most overlaps anchor_by_gt_overlap == gt_to_anchor_max)[0] # Fg label: for each gt use anchors with highest overlap # (including ties) gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap] # 416 labels[anchors_with_max_overlap] = gt_classes[gt_inds] # Fg label: above threshold IOU inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP gt_inds = anchor_to_gt_argmax[inds] labels[inds] = gt_classes[ gt_inds] # for all anchors, inds are valued by gt_inds, this gives class values 1~80 fg_inds = np.where(labels >= 1)[0] bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0] labels[bg_inds] = 0 num_fg, num_bg = len(fg_inds), len(bg_inds) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]) # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size end_idx = start_idx + H * W _labels = labels[start_idx:end_idx] triangle_start_idx = start_idx _bbox_targets = bbox_targets[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, height, width) _labels = _labels.reshape((1, 1, H, W)) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape( (1, H, W, 4)).transpose(0, 3, 1, 2) stride = foa.stride w = int(im_width / stride) h = int(im_height / stride) # data for select_smooth_l1 loss num_classes = cfg.MODEL.NUM_CLASSES - 1 inds_4d = np.where(_labels > 0) M = len(inds_4d) _roi_bbox_targets = np.zeros((0, 4)) _roi_fg_bbox_locs = np.zeros((0, 4)) if M > 0: im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3] _roi_bbox_targets = np.zeros((len(im_inds), 4)) _roi_fg_bbox_locs = np.zeros((len(im_inds), 4)) lbls = _labels[im_inds, :, y, x] for i, lbl in enumerate(lbls): l = lbl[0] - 1 if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: l = 0 assert l >= 0 and l < num_classes, 'label out of the range' _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]] _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]]) if _roi_bbox_targets.astype( np.float32).shape[0] == 0 and _roi_fg_bbox_locs.astype( np.float32).shape[0] == 0: blobs_out.append( dict( retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32), retnet_roi_bbox_targets=_roi_bbox_targets.astype( np.float32), retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype( np.float32), # retnet_roi_bbox_targets=np.array([[0, 0, 0, 0]]), # retnet_roi_fg_bbox_locs=np.array([[0, 0, 0, 0]]), )) # we don't add zero padding here, because this is inside the loop of foa, we don't # want every anchor to have padding, instead we want to firstly sum all anchors in a FPN of an image, and then check if it's emtpy else: blobs_out.append( dict( retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32), retnet_roi_bbox_targets=_roi_bbox_targets.astype( np.float32), retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype( np.float32), )) out_num_fg = np.array([num_fg + 1.0], dtype=np.float32) out_num_bg = (np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) + out_num_fg * (cfg.MODEL.NUM_CLASSES - 2)) return blobs_out, out_num_fg, out_num_bg
def forward(self, proposals, gt_boxes): """ Args: proposals (Tensor): Region proposals in (0, x1, y1, x2, y2) format coming from RPN. gt_boxes (Tensor): Ground-truth boxes in (x1, y1, x2, y2, class, person_id) format. Returns: proposals (Tensor[N, 5]): Sampled proposals. cls_labels (Tensor[N]): Ground-truth classification labels of the proposals. pid_labels (Tensor[N]): Ground-truth person IDs of the proposals. deltas (Tensor[N, num_classes * 4]): Ground-truth regression deltas of the proposals. inside_weights, outside_weights (Tensor): Used to calculate smooth_l1_loss. """ assert torch.all(proposals[:, 0] == 0), "Single batch only." # Include ground-truth boxes in the set of candidate proposals zeros = gt_boxes.new(gt_boxes.shape[0], 1).zero_() proposals = torch.cat( (proposals, torch.cat((zeros, gt_boxes[:, :4]), dim=1)), dim=0) overlaps = bbox_overlaps(proposals[:, 1:5], gt_boxes[:, :4]) max_overlaps, argmax_overlaps = overlaps.max(dim=1) cls_labels = gt_boxes[argmax_overlaps, 4] pid_labels = gt_boxes[argmax_overlaps, 5] # Sample some proposals at the specified positive and negative ratio batch_size = cfg.TRAIN.BATCH_SIZE num_fg = round(cfg.TRAIN.FG_FRACTION * batch_size) # Sample foreground proposals fg_inds = torch.nonzero(max_overlaps >= cfg.TRAIN.FG_THRESH)[:, 0] num_fg = min(num_fg, fg_inds.numel()) if fg_inds.numel() > 0: if "DEBUG" in os.environ: fg_inds = fg_inds[:num_fg] else: fg_inds = torch_rand_choice(fg_inds, num_fg) # Sample background proposals bg_inds = torch.nonzero((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[:, 0] num_bg = min(batch_size - num_fg, bg_inds.numel()) if bg_inds.numel() > 0: if "DEBUG" in os.environ: bg_inds = bg_inds[:num_bg] else: bg_inds = torch_rand_choice(bg_inds, num_bg) # assert num_fg + num_bg == batch_size keep = torch.cat((fg_inds, bg_inds)) cls_labels = cls_labels[keep] pid_labels = pid_labels[keep] proposals = proposals[keep] # Correct the cls_labels and pid_labels of bg proposals cls_labels[num_fg:] = 0 pid_labels[num_fg:] = self.bg_pid_label deltas, inside_weights, outside_weights = self.get_regression_targets( proposals[:, 1:5], gt_boxes[argmax_overlaps][keep, :4], cls_labels, self.num_classes, ) return ( proposals, cls_labels.long(), pid_labels.long(), deltas, inside_weights, outside_weights, )
def _merge_proposal_boxes_into_roidb(roidb, box_list): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) for i, entry in enumerate(roidb): boxes = box_list[i] num_boxes = boxes.shape[0] gt_overlaps = np.zeros( (num_boxes, entry['gt_overlaps'].shape[1]), dtype=entry['gt_overlaps'].dtype ) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype ) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). gt_inds = np.where(entry['gt_classes'] > 0)[0] if len(gt_inds) > 0: gt_boxes = entry['boxes'][gt_inds, :] gt_classes = entry['gt_classes'][gt_inds] proposal_to_gt_overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False) ) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) argmaxes = proposal_to_gt_overlaps.argmax(axis=1) # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) # Those boxes with non-zero overlap with gt boxes I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] entry['boxes'] = np.append( entry['boxes'], boxes.astype(entry['boxes'].dtype, copy=False), axis=0 ) entry['gt_classes'] = np.append( entry['gt_classes'], np.zeros((num_boxes), dtype=entry['gt_classes'].dtype) ) entry['seg_areas'] = np.append( entry['seg_areas'], np.zeros((num_boxes), dtype=entry['seg_areas'].dtype) ) entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append( entry['is_crowd'], np.zeros((num_boxes), dtype=entry['is_crowd'].dtype) ) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map.astype( entry['box_to_gt_ind_map'].dtype, copy=False ) )
def evaluate_box_proposals( json_dataset, roidb, thresholds=None, area='all', limit=None ): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2]] # 512-inf assert area in areas, 'Unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for entry in roidb: gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_boxes = entry['boxes'][gt_inds, :] gt_areas = entry['seg_areas'][gt_inds] valid_gt_inds = np.where( (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) non_gt_inds = np.where(entry['gt_classes'] == 0)[0] boxes = entry['boxes'][non_gt_inds, :] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(min(boxes.shape[0], gt_boxes.shape[0])): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps, 'num_pos': num_pos}
def add_refine_local_mask_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx, data): """Add RefineNet Mask specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.REFINENET.RESOLUTION up_scale = cfg.REFINENET.UP_SCALE polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] gt_classes = roidb['gt_classes'][polys_gt_inds] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 # Define size variables inp_h, inp_w = data.shape[2], data.shape[3] pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # Expand the foreground rois by a factor of up_scale and # clip by the padded image boundary pad_rois_fg = box_utils.expand_boxes(rois_fg, up_scale) pad_rois_fg = box_utils.clip_boxes_to_image(pad_rois_fg, pad_img_h, pad_img_w) if cfg.REFINENET.ONLY_USE_CROWDED_SAMPLES: # Only use crowded samples to train the RefineNet THRES = cfg.REFINENET.OVERLAP_THRESHOLD for i in range(rois_fg.shape[0]): overlap = overlaps_bbfg_bbpolys[i] if np.sum(overlap > THRES) > 1: # if has multiple instances overlapped, use it for training fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box( poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # Only one instance, then set label to be -1 (ignored) masks[i, :] = -1 mask_class_labels[i] = 0 elif cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES: loss_weights = blob_utils.ones((rois_fg.shape[0], )) for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] class_label = mask_class_labels[i] # Rasterize the portion of the polygon mask within the given # fg roi to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) # And now determine the weight for each roi. If any instance # that is of the same class as the RoI, then we expect it to # be a hard sample and assigns a larger weight for this RoI for j in range(len(polys_gt)): if j == fg_polys_ind: continue if gt_classes[ j] == class_label: # only same class is valid mask = segm_utils.polys_to_mask_wrt_box( polys_gt[j], pad_roi_fg, M) # and check if has anypart fall inside the bbox is_inside_bbox = (np.sum(mask) > 0) if is_inside_bbox: loss_weights[i] = cfg.REFINENET.WEIGHT_LOSS_CROWDED break # early stop else: # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # pad_rois_fg is actually one background roi, but that's ok because ... pad_rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) pad_rois_fg = (pad_rois_fg.astype(np.float32)) * im_scale repeated_batch_idx = batch_idx * blob_utils.ones((pad_rois_fg.shape[0], 1)) pad_rois_fg = np.hstack((repeated_batch_idx, pad_rois_fg)).astype(np.int32) # Update blobs dict with Refine-Net blobs blobs['refined_mask_rois'] = pad_rois_fg blobs['roi_has_refined_mask_int32'] = roi_has_mask blobs['refined_masks_int32'] = masks if cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES: blobs['loss_weights'] = loss_weights
def add_refine_global_mask_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx, data): """Add RefineNet Mask specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. dst_scale = cfg.REFINENET.SPATIAL_SCALE polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 # Define size variables inp_h, inp_w = data.shape[2], data.shape[3] out_h, out_w = int(inp_h * dst_scale), int(inp_w * dst_scale) if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], out_h, out_w), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # narrow scale and size scale = im_scale * dst_scale im_h, im_w = roidb['height'], roidb['width'] im_label_h, im_label_w = int(im_h * scale), int(im_w * scale) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an im_label_h x im_label_w binary image mask = segm_utils.polys_to_mask_scaled(poly_gt, im_h, im_w, scale) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, 0:im_label_h, 0:im_label_w] = mask masks = np.reshape(masks, (-1, out_h * out_w)) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, out_h * out_w), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Refine-Net blobs blobs['refined_mask_rois'] = rois_fg blobs['roi_has_refined_mask_int32'] = roi_has_mask blobs['refined_masks_int32'] = masks
def forward(self, det_rois, det_labels, det_scores, im_info, dataset_name, roidb=None): """ det_rois: feature maps from the backbone network. (Variable) im_info: (CPU Variable) roidb: (list of ndarray) """ # Get pairwise proposals first im_inds = det_rois[:, 0] is_cand = im_inds[:, None] == im_inds[None, :] is_cand.reshape(-1)[diagonal_inds(is_cand)] = False is_empty = np.where(is_cand.any(1) == 0)[0] if self.overlap: is_cand = is_cand & (box_utils.bbox_overlaps( det_rois[:, 1:], det_rois[:, 1:]) > 0) if is_empty.size > 0: is_cand[is_empty, is_empty] = True sbj_inds, obj_inds = np.where(is_cand) # remove self paired rois sbj_rois = det_rois[sbj_inds] obj_rois = det_rois[obj_inds] im_scale = im_info.data.numpy()[:, 2][0] sbj_boxes = sbj_rois[:, 1:] / im_scale obj_boxes = obj_rois[:, 1:] / im_scale # filters out those roi pairs whose boxes are not overlapping in the original scales return_dict = {} sbj_labels = det_labels[sbj_inds] obj_labels = det_labels[obj_inds] sbj_scores = det_scores[sbj_inds] obj_scores = det_scores[obj_inds] rel_rois = box_utils_rel.rois_union(sbj_rois, obj_rois) return_dict['det_rois'] = det_rois return_dict['sbj_inds'] = sbj_inds return_dict['obj_inds'] = obj_inds return_dict['sbj_rois'] = sbj_rois return_dict['obj_rois'] = obj_rois return_dict['rel_rois'] = rel_rois return_dict['sbj_labels'] = sbj_labels return_dict['obj_labels'] = obj_labels return_dict['sbj_scores'] = sbj_scores return_dict['obj_scores'] = obj_scores return_dict['fg_size'] = np.array([sbj_rois.shape[0]], dtype=np.int32) if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS: lvl_min = cfg.FPN.ROI_MIN_LEVEL lvl_max = cfg.FPN.ROI_MAX_LEVEL # when use min_rel_area, the same sbj/obj area could be mapped to different feature levels # when they are associated with different relationships # Thus we cannot get det_rois features then gather sbj/obj features # The only way is gather sbj/obj per relationship, thus need to return sbj_rois/obj_rois rois_blob_names = ['det_rois', 'rel_rois'] for rois_blob_name in rois_blob_names: # Add per FPN level roi blobs named like: <rois_blob_name>_fpn<lvl> target_lvls = fpn_utils.map_rois_to_fpn_levels( return_dict[rois_blob_name][:, 1:5], lvl_min, lvl_max) fpn_utils.add_multilevel_roi_blobs(return_dict, rois_blob_name, return_dict[rois_blob_name], target_lvls, lvl_min, lvl_max) return return_dict
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where( (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0) )[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) # Keep only a subset of classes (set A in the paper) for mask training if cfg.TRAIN.MRCNN_FILTER_LABELS: keep_label_set = set(cfg.TRAIN.MRCNN_LABELS_TO_KEEP) labels_int32 = blobs['labels_int32'] labels_int32_keep = np.array( [(l if l in keep_label_set else 0) for l in labels_int32], dtype=labels_int32.dtype) else: labels_int32_keep = blobs['labels_int32'] fg_inds = np.where(labels_int32_keep > 0)[0] roi_has_mask = labels_int32_keep.copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False) ) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def _compute_pairwise_iou(a, b): """ a, b (np.ndarray) of shape Nx4T and Mx4T. The output is NxM, for each combination of boxes. """ return box_utils.bbox_overlaps(a, b)
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes): total_anchors = all_anchors.shape[0] straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH if straddle_thresh >= 0: # Only keep anchors inside the image by a margin of straddle_thresh # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all # anchors inds_inside = np.where( (all_anchors[:, 0] >= -straddle_thresh) & (all_anchors[:, 1] >= -straddle_thresh) & (all_anchors[:, 2] < im_width + straddle_thresh) & (all_anchors[:, 3] < im_height + straddle_thresh) )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] else: inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.int32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1]) ] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max )[0] # Fg label: for each gt use anchors with highest overlap # (including ties) labels[anchors_with_max_overlap] = 1 # Fg label: above threshold IOU labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False ) labels[disable_inds] = -1 fg_inds = np.where(labels == 1)[0] # subsample negative labels if we have too many # (samples with replacement, but since the set of bg inds is large most # samples will not have repeats) num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1) bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0] if len(bg_inds) > num_bg: enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)] labels[enable_inds] = 0 bg_inds = np.where(labels == 0)[0] bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :] ) # Bbox regression loss has the form: # loss(x) = weight_outside * L(weight_inside * x) # Inside weights allow us to set zero loss on an element-wise basis # Bbox regression is only trained on positive examples so we set their # weights to 1.0 (or otherwise if config is different) and 0 otherwise bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0) # The bbox regression loss only averages by the number of images in the # mini-batch, whereas we need to average by the total number of example # anchors selected # Outside weights are used to scale each element-wise loss so the final # average over the mini-batch is correct bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) bbox_outside_weights[labels == 1, :] = 1.0 / num_examples bbox_outside_weights[labels == 0, :] = 1.0 / num_examples # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap( bbox_targets, total_anchors, inds_inside, fill=0 ) bbox_inside_weights = data_utils.unmap( bbox_inside_weights, total_anchors, inds_inside, fill=0 ) bbox_outside_weights = data_utils.unmap( bbox_outside_weights, total_anchors, inds_inside, fill=0 ) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size A = foa.num_cell_anchors end_idx = start_idx + H * W * A _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :] _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, A, height, width) _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_inside_weights output with shape (1, 4 * A, height, width) _bbox_inside_weights = _bbox_inside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_outside_weights output with shape (1, 4 * A, height, width) _bbox_outside_weights = _bbox_outside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) blobs_out.append( dict( rpn_labels_int32_wide=_labels, rpn_bbox_targets_wide=_bbox_targets, rpn_bbox_inside_weights_wide=_bbox_inside_weights, rpn_bbox_outside_weights_wide=_bbox_outside_weights ) ) return blobs_out[0] if len(blobs_out) == 1 else blobs_out
def rel_assignments(im_inds, rpn_rois, roi_gtlabels, roidb, im_info, num_sample_per_gt=4, filter_non_overlap=True): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. :param rpn_rois: [img_ind, x1, y1, x2, y2] :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] :param gt_classes: [num_boxes, 2] array of [img_ind, class] :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) :return: rois: [num_rois, 5] labels: [num_rois] array of labels bbox_targets [num_rois, 4] array of targets for the labels. rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) """ fg_rels_per_image = int( np.round(cfg.TRAIN.FG_REL_FRACTION * cfg.TRAIN.RELS_PER_IMG_REFINE)) num_im = int(im_inds.max() + 1) indices_sets = [np.where(im_inds == i)[0] for i in range(num_im)] # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format( # pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np # )) rel_labels = [] num_box_seen = 0 for i, indices in enumerate(indices_sets): gt_boxes_i = roidb[i]['boxes'] gt_rois_i = gt_boxes_i * im_info[i, 2] gt_classes_i = roidb[i]['gt_classes'] sbj_gt_boxes_i = roidb[i]['sbj_gt_boxes'] obj_gt_boxes_i = roidb[i]['obj_gt_boxes'] prd_gt_classes_i = roidb[i]['prd_gt_classes'] if cfg.MODEL.USE_BG: prd_gt_classes_i += 1 sbj_gt_inds_i = box_utils.bbox_overlaps(sbj_gt_boxes_i, gt_boxes_i).argmax(-1) obj_gt_inds_i = box_utils.bbox_overlaps(obj_gt_boxes_i, gt_boxes_i).argmax(-1) gt_rels_i = np.stack((sbj_gt_inds_i, obj_gt_inds_i, prd_gt_classes_i), -1) # [num_pred, num_gt] pred_rois_i = rpn_rois[indices, 1:] pred_roilabels_i = roi_gtlabels[indices] ious = box_utils.bbox_overlaps(pred_rois_i, gt_rois_i) is_match = (pred_roilabels_i[:, None] == gt_classes_i[None]) & (ious >= cfg.TRAIN.FG_THRESH) # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box pbi_iou = box_utils.bbox_overlaps(pred_rois_i, pred_rois_i) if filter_non_overlap: rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0) rels_intersect = rel_possibilities else: rel_possibilities = np.ones( (pred_rois_i.shape[0], pred_rois_i.shape[0]), dtype=np.int64) - np.eye(pred_rois_i.shape[0], dtype=np.int64) rels_intersect = (pbi_iou < 1) & (pbi_iou > 0) # ONLY select relations between ground truth because otherwise we get useless data rel_possibilities[pred_roilabels_i == 0] = 0 rel_possibilities[:, pred_roilabels_i == 0] = 0 # Sample the GT relationships. fg_rels = [] p_size = [] for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i): fg_rels_i = [] fg_scores_i = [] for from_ind in np.where(is_match[:, from_gtind])[0]: for to_ind in np.where(is_match[:, to_gtind])[0]: if from_ind != to_ind: fg_rels_i.append((from_ind, to_ind, rel_id)) fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind])) rel_possibilities[from_ind, to_ind] = 0 if len(fg_rels_i) == 0: continue p = np.array(fg_scores_i) p = p / p.sum() p_size.append(p.shape[0]) num_to_add = min(p.shape[0], num_sample_per_gt) for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False): fg_rels.append(fg_rels_i[rel_to_add]) fg_rels = np.array(fg_rels, dtype=np.int64) if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image: fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)] elif fg_rels.size == 0: fg_rels = np.zeros((0, 3), dtype=np.int64) bg_rels = np.column_stack(np.where(rel_possibilities)) bg_rels = np.column_stack( (bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64))) num_bg_rel = min(cfg.TRAIN.RELS_PER_IMG_REFINE - fg_rels.shape[0], bg_rels.shape[0]) if bg_rels.size > 0: # Sample 4x as many intersecting relationships as non-intersecting. # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]] # p = bg_rels_intersect.astype(np.float32) # p[bg_rels_intersect == 0] = 0.2 # p[bg_rels_intersect == 1] = 0.8 # p /= p.sum() bg_rels = bg_rels[np.random.choice( bg_rels.shape[0], #p=p, size=num_bg_rel, replace=False)] else: bg_rels = np.zeros((0, 3), dtype=np.int64) if fg_rels.size == 0 and bg_rels.size == 0: # Just put something here bg_rels = np.array([[0, 0, 0]], dtype=np.int64) # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape)) all_rels_i = np.concatenate((fg_rels, bg_rels), 0) all_rels_i[:, 0:2] += num_box_seen all_rels_i = all_rels_i[np.lexsort((all_rels_i[:, 1], all_rels_i[:, 0]))] rel_labels.append( np.column_stack(( i * np.ones(all_rels_i.shape[0], dtype=np.int64), all_rels_i, ))) num_box_seen += pred_rois_i.shape[0] rel_labels = np.concatenate(rel_labels, 0) return rel_labels[:, :-1], rel_labels