def test_bbox_dataset_to_prediction_roundtrip(self): """Simulate the process of reading a ground-truth box from a dataset, make predictions from proposals, convert the predictions back to the dataset format, and then use the COCO API to compute IoU overlap between the gt box and the predictions. These should have IoU of 1. """ weights = (5, 5, 10, 10) # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format gt_xywh_box = [10, 20, 100, 150] # 2/ convert it to our internal (x1, y1, x2, y2) format gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box) # 3/ consider nearby proposal boxes prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10) # 4/ compute proposal-to-gt transformation deltas deltas = box_utils.bbox_transform_inv( prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights ) # 5/ use deltas to transform proposals to xyxy predicted box pred_xyxy_boxes = box_utils.bbox_transform( prop_xyxy_boxes, deltas, weights=weights ) # 6/ convert xyxy predicted box to xywh predicted box pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes) # 7/ use COCO API to compute IoU not_crowd = [int(False)] * pred_xywh_boxes.shape[0] ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd) np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
def test_bbox_transform_and_inverse(self): weights = (5, 5, 10, 10) src_boxes = random_boxes([10, 10, 20, 20], 1, 10) dst_boxes = random_boxes([10, 10, 20, 20], 1, 10) deltas = box_utils.bbox_transform_inv( src_boxes, dst_boxes, weights=weights ) dst_boxes_reconstructed = box_utils.bbox_transform( src_boxes, deltas, weights=weights ) np.testing.assert_array_almost_equal( dst_boxes, dst_boxes_reconstructed, decimal=5 )
def postprocess_output(rois,scaling_factor,im_size,class_scores,bbox_deltas,bbox_reg_weights = (10.0,10.0,5.0,5.0)): boxes = to_np(rois.div(scaling_factor).squeeze(0)) bbox_deltas = to_np(bbox_deltas) orig_im_size = to_np(im_size).squeeze() # apply deltas pred_boxes = box_utils.bbox_transform(boxes, bbox_deltas, bbox_reg_weights) # clip on boundaries pred_boxes = box_utils.clip_tiled_boxes(pred_boxes,orig_im_size) scores = to_np(class_scores) # Map scores and predictions back to the original set of boxes # This re-duplicates the previously removed boxes # Is there any use for this? # inv_index = to_np(batch['proposal_inv_index']).squeeze().astype(np.int64) # scores = scores[inv_index, :] # pred_boxes = pred_boxes[inv_index, :] # threshold on score and run nms to remove duplicates scores_final, boxes_final, boxes_per_class = box_results_with_nms_and_limit(scores, pred_boxes) return (scores_final, boxes_final, boxes_per_class)
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None): """Prepare the bbox for testing""" inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') if cfg.PYTORCH_VERSION_LESS_THAN_040: inputs['data'] = [ Variable(torch.from_numpy(inputs['data']), volatile=True) ] inputs['im_info'] = [ Variable(torch.from_numpy(inputs['im_info']), volatile=True) ] else: inputs['data'] = [torch.from_numpy(inputs['data'])] inputs['im_info'] = [torch.from_numpy(inputs['im_info'])] return_dict = model(**inputs) if cfg.MODEL.FASTER_RCNN: rois = return_dict['rois'].data.cpu().numpy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scale # cls prob (activations after softmax) scores = return_dict['cls_score'].data.cpu().numpy().squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = return_dict['bbox_pred'].data.cpu().numpy().squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # (legacy) Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \ + cfg.TRAIN.BBOX_NORMALIZE_MEANS pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, im_scale, return_dict['blob_conv']
def im_detections(model, im, anchors): """Generate RetinaNet detections on a single image.""" k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], inputs['im_info'] = _get_image_blob(im) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) scale = inputs['im_info'][0, 2] cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) # Combine predictions across all levels and retain the top scoring by class detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] return cls_boxes
def proposals_for_one_image(self, im_info, all_anchors, bbox_deltas, scores): # Get mode-dependent configuration cfg_key = 'TRAIN' if self.training else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE min_area = cfg[cfg_key].RPN_MIN_AREA # print('generate_proposals:', pre_nms_topN, post_nms_topN, nms_thresh, min_size) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A, H, W) format from conv output # - transpose to (H, W, 4 * A) # - reshape to (H * W * A, 4) where rows are ordered by (H, W, A) # in slowest to fastest order to match the enumerated anchors bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4)) # Same story for the scores: # - scores are (A, H, W) format from conv output # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # print('pre_nms:', bbox_deltas.shape, scores.shape) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] all_anchors = all_anchors[order, :] scores = scores[order] # Transform anchors into proposals via bbox transformations proposals = box_utils.bbox_transform(all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0)) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < min_size keep = _filter_boxes(proposals, min_size, im_info) proposals = proposals[keep, :] scores = scores[keep] # print('pre_nms:', proposals.shape, scores.shape) if min_area > 0: keep = _filter_areas(proposals, min_area, im_info) proposals = proposals[keep, :] scores = scores[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if nms_thresh > 0: # NMS_timer = Timer() # NMS_timer.tic() keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh) # NMS_timer.toc() # logger.info('Only NMS time: {:.3f}s'.format(NMS_timer.average_time)) # print('nms keep:', keep.shape) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # print('final proposals:', proposals.shape, scores.shape) return proposals, scores
def im_detect_bbox_batch(model, ims, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use ims (ndarray): cfg.TEST.IMS_PER_BATCH color images to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores list((ndarray)): [R x K] array of object class scores for K classes (K includes background as object category 0) boxes list((ndarray)): [R x 4*K] array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scales = _get_blobs(ims, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) # Read out blobs if cfg.MODEL.FASTER_RCNN: rois = workspace.FetchBlob(core.ScopedName('rois')) # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal scores_batch = [] pred_boxes_batch = [] for i in range(cfg.TEST.IMS_PER_BATCH): # select batch select_inds = np.where(rois[:, 0] == i) # unscale back to raw image space boxes = rois[select_inds, 1:5] / im_scales[i] boxes = boxes.reshape([-1, boxes.shape[-1]]) scores_i = scores[select_inds, :] scores_i = scores_i.reshape([-1, scores_i.shape[-1]]) scores_batch.append(scores_i) if cfg.TEST.BBOX_REG: # In case there is 1 proposal box_deltas_i = box_deltas[select_inds, :] box_deltas_i = box_deltas_i.reshape([-1, box_deltas_i.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas_i = box_deltas_i[:, -4:] pred_boxes = box_utils.bbox_transform(boxes, box_deltas_i, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, ims[i].shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = (np.tile(pred_boxes, (1, scores_i.shape[1]))) pred_boxes_batch.append(pred_boxes) else: logger.error('Not implemented.') return None, None, None if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: logger.error('Not implemented.') return None, None, None return scores_batch, pred_boxes_batch, im_scales
def _forward(self, data, im_info, roidb=None, **rpn_kwargs): im_data = data if self.training: roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb)) device_id = im_data.get_device() return_dict = {} # A dict to collect return variables blob_conv = self.Conv_Body(im_data) rpn_ret = self.RPN(blob_conv, im_info, roidb) # if self.training: # # can be used to infer fg/bg ratio # return_dict['rois_label'] = rpn_ret['labels_int32'] if cfg.FPN.FPN_ON: # Retain only the blobs that will be used for RoI heads. `blob_conv` may include # extra blobs that are used for RPN proposals, but not for RoI heads. blob_conv = blob_conv[-self.num_roi_levels:] if not self.training: return_dict['blob_conv'] = blob_conv if not cfg.MODEL.RPN_ONLY: if cfg.MODEL.SHARE_RES5 and self.training: box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret) else: box_feat = self.Box_Head(blob_conv, rpn_ret) cls_score, bbox_pred = self.Box_Outs(box_feat) else: # TODO: complete the returns for RPN only situation pass if self.training: return_dict['losses'] = {} return_dict['metrics'] = {} # rpn loss rpn_kwargs.update( dict((k, rpn_ret[k]) for k in rpn_ret.keys() if (k.startswith('rpn_cls_logits') or k.startswith('rpn_bbox_pred')))) loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses( **rpn_kwargs) if cfg.RPN.VIS_QUANT_TARGET: import numpy as np import json import os import time gt_boxes = [] gt_label = roidb[0]['gt_classes'] for inds, item in enumerate(gt_label): if item != 0: gt_boxes.append(roidb[0]['boxes'][inds]) gt_boxes = np.array(gt_boxes, dtype=np.float32) gt_boxes *= im_info.detach().numpy()[:, 2] path = "/nfs/project/libo_i/Boosting/Targets_Info" if not os.path.exists(path): os.makedirs(path) b, c, h, w = rpn_kwargs['rpn_cls_logits_fpn3'].shape sample_targets = rpn_kwargs[ 'rpn_bbox_targets_wide_fpn3'][:, :, :h, :w] line_targets = sample_targets.detach().data.cpu().numpy() with open(os.path.join(path, "quant_anchors.json"), "r") as fp: quant_anchors = np.array(json.load(fp), dtype=np.float32) quant_anchors = quant_anchors[:h, :w] line_targets = line_targets[:, 4:8, :, :].transpose( (0, 2, 3, 1)).reshape(quant_anchors.shape) line_targets = line_targets.reshape(-1, 4) width = im_data.shape[3] height = im_data.shape[2] # 在这里加上targets的偏移 line_quant_anchors = quant_anchors.reshape(-1, 4) pred_boxes = box_utils.onedim_bbox_transform( line_quant_anchors, line_targets) pred_boxes = box_utils.clip_tiled_boxes( pred_boxes, (height, width, 3)) im = im_data.detach().cpu().numpy().reshape(3, height, width).transpose( (1, 2, 0)) means = np.squeeze(cfg.PIXEL_MEANS) for i in range(3): im[:, :, i] += means[i] im = im.astype(int) dpi = 200 fig = plt.figure(frameon=False) fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.axis('off') fig.add_axes(ax) ax.imshow(im[:, :, ::-1]) # 在im上添加gt for item in gt_boxes: ax.add_patch( plt.Rectangle((item[0], item[1]), item[2] - item[0], item[3] - item[1], fill=False, edgecolor='white', linewidth=1, alpha=1)) cnt = 0 for inds, before_item in enumerate(line_quant_anchors): after_item = pred_boxes[inds] targets_i = line_targets[inds] if np.sum(targets_i) == 0: continue ax.add_patch( plt.Rectangle((before_item[0], before_item[1]), before_item[2] - before_item[0], before_item[3] - before_item[1], fill=False, edgecolor='r', linewidth=1, alpha=1)) ax.add_patch( plt.Rectangle((after_item[0], after_item[1]), after_item[2] - after_item[0], after_item[3] - after_item[1], fill=False, edgecolor='g', linewidth=1, alpha=1)) logger.info("valid boxes: {}".format(cnt)) cnt += 1 if cnt != 0: ticks = time.time() fig.savefig( "/nfs/project/libo_i/Boosting/Targets_Info/{}.png". format(ticks), dpi=dpi) plt.close('all') if cfg.FPN.FPN_ON: for i, lvl in enumerate( range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1)): return_dict['losses']['loss_rpn_cls_fpn%d' % lvl] = loss_rpn_cls[i] return_dict['losses']['loss_rpn_bbox_fpn%d' % lvl] = loss_rpn_bbox[i] else: return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox # bbox loss loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses( cls_score, bbox_pred, rpn_ret['labels_int32'], rpn_ret['bbox_targets'], rpn_ret['bbox_inside_weights'], rpn_ret['bbox_outside_weights']) if cfg.RPN.ZEROLOSS: zero_loss_bbox = torch.Tensor([0.]).squeeze().cuda() zero_loss_bbox.requires_grad = True zero_loss_cls = torch.Tensor([0.]).squeeze().cuda() zero_loss_cls.requires_grad = True return_dict['losses']['loss_bbox'] = zero_loss_bbox return_dict['losses']['loss_cls'] = zero_loss_cls else: return_dict['losses']['loss_bbox'] = loss_bbox return_dict['losses']['loss_cls'] = loss_cls return_dict['metrics']['accuracy_cls'] = accuracy_cls if cfg.MODEL.MASK_ON: if getattr(self.Mask_Head, 'SHARE_RES5', False): mask_feat = self.Mask_Head( res5_feat, rpn_ret, roi_has_mask_int32=rpn_ret['roi_has_mask_int32']) else: mask_feat = self.Mask_Head(blob_conv, rpn_ret) mask_pred = self.Mask_Outs(mask_feat) # return_dict['mask_pred'] = mask_pred # mask loss loss_mask = mask_rcnn_heads.mask_rcnn_losses( mask_pred, rpn_ret['masks_int32']) return_dict['losses']['loss_mask'] = loss_mask if cfg.MODEL.KEYPOINTS_ON: if getattr(self.Keypoint_Head, 'SHARE_RES5', False): # No corresponding keypoint head implemented yet (Neither in Detectron) # Also, rpn need to generate the label 'roi_has_keypoints_int32' kps_feat = self.Keypoint_Head( res5_feat, rpn_ret, roi_has_keypoints_int32=rpn_ret[ 'roi_has_keypoint_int32']) else: kps_feat = self.Keypoint_Head(blob_conv, rpn_ret) kps_pred = self.Keypoint_Outs(kps_feat) # return_dict['keypoints_pred'] = kps_pred # keypoints loss if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS: loss_keypoints = keypoint_rcnn_heads.keypoint_losses( kps_pred, rpn_ret['keypoint_locations_int32'], rpn_ret['keypoint_weights']) else: loss_keypoints = keypoint_rcnn_heads.keypoint_losses( kps_pred, rpn_ret['keypoint_locations_int32'], rpn_ret['keypoint_weights'], rpn_ret['keypoint_loss_normalizer']) return_dict['losses']['loss_kps'] = loss_keypoints # pytorch0.4 bug on gathering scalar(0-dim) tensors for k, v in return_dict['losses'].items(): return_dict['losses'][k] = v.unsqueeze(0) for k, v in return_dict['metrics'].items(): return_dict['metrics'][k] = v.unsqueeze(0) else: # Testing return_dict['rois'] = rpn_ret['rois'] return_dict['cls_score'] = cls_score return_dict['bbox_pred'] = bbox_pred if cfg.TEST.PROPOSALS_OUT: import os import json import numpy as np # 直接在这里做变换,输出经过变换之后的1000个框 bbox_pred = bbox_pred.data.cpu().numpy().squeeze() box_deltas = bbox_pred.reshape([-1, bbox_pred.shape[-1]]) shift_boxes = box_utils.bbox_transform( rpn_ret['rois'][:, 1:5], box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) shift_boxes = box_utils.clip_tiled_boxes( shift_boxes, im_info.data.cpu().numpy().squeeze()[0:2]) num_classes = cfg.MODEL.NUM_CLASSES onecls_pred_boxes = [] inds_all = [] for j in range(1, num_classes): inds = np.where(cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0] boxes_j = shift_boxes[inds, j * 4:(j + 1) * 4] onecls_pred_boxes += boxes_j.tolist() inds_all.extend(inds.tolist()) inds_all = np.array(inds_all, dtype=np.int) aligned_proposals = rpn_ret['rois'][:, 1:5][inds_all] aligned_boxes = np.array(onecls_pred_boxes, dtype=np.float32) assert inds_all.shape[0] == aligned_boxes.shape[0] assert aligned_proposals.size == aligned_boxes.size path = "/nfs/project/libo_i/Boosting/Anchor_Info" with open(os.path.join(path, "proposals.json"), "w") as fp: json.dump(aligned_proposals.tolist(), fp) with open(os.path.join(path, "boxes.json"), "w") as fp: json.dump(aligned_boxes.tolist(), fp) return return_dict
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique( hashes, return_index=True, return_inverse=True ) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) # Read out blobs if cfg.MODEL.FASTER_RCNN: rois = workspace.FetchBlob(core.ScopedName('rois')) # unscale back to raw image space boxes = rois[:, 1:5] / im_scale # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform( boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS ) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, im_scale
def proposals_for_one_image( self, im_info, all_anchors, bbox_deltas, scores, frames_per_vid): # Get mode-dependent configuration cfg_key = 'TRAIN' if self._train else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A * frames_per_vid, H, W) format from # conv output # - transpose to (H, W, 4 * A * frames_per_vid) # - reshape to (H * W * A, 4 * frames_per_vid) where rows are ordered # by (H, W, A) in slowest to fastest order to match the enumerated # anchors bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape(( -1, 4 * frames_per_vid)) # Same story for the scores: # - scores are (A, H, W) format from conv output # This computes the score for the tube # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if pre_nms_topN <= 0 or pre_nms_topN > len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition( -scores.squeeze(), pre_nms_topN )[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] all_anchors = all_anchors[order, :] scores = scores[order] # 1. Transform anchors into proposals via bbox transformations proposals = box_utils.bbox_transform( all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0)) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < min_size # again, needs to be done one each frame and need to "AND" over frames keep = np.arange(proposals.shape[0]) for frame_id in range(frames_per_vid): keep = np.intersect1d( keep, _filter_boxes( proposals[:, frame_id * 4: (frame_id + 1) * 4], min_size, im_info)) proposals = proposals[keep, :] scores = scores[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if nms_thresh > 0: # When we're training on multiple GPUs, running NMS on the GPU # causes serious perf issues. We need to debug, but for now force # running on the CPU when training keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores
def im_detect_bbox(model, im, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: raise NotImplementedError('Can not handle tubes, need to extend dedup') v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique( hashes, return_index=True, return_inverse=True) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) # dump workspace blobs (debugging) # if 0: # from utils.io import robust_pickle_dump # import os, sys # saved_blobs = {} # ws_blobs = workspace.Blobs() # for dst_name in ws_blobs: # ws_blob = workspace.FetchBlob(dst_name) # saved_blobs[dst_name] = ws_blob # det_file = os.path.join('/tmp/output/data_dump_inflT1.pkl') # robust_pickle_dump(saved_blobs, det_file) # logger.info("DUMPED BLOBS") # sys.exit(0) # Read out blobs if cfg.MODEL.FASTER_RCNN: assert len(im_scales) == 1, \ 'Only single-image / single-scale batch implemented' rois = workspace.FetchBlob(core.ScopedName('rois')) # unscale back to raw image space boxes = rois[:, 1:] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they were # trained as linear SVMs scores = workspace.FetchBlob(core.ScopedName('cls_score')).squeeze() else: # use softmax estimated probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) time_dim = boxes.shape[-1] // 4 if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4 * time_dim:] pred_boxes = box_utils.bbox_transform( boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, im_scales
def proposals_for_one_image(self, im_info, all_anchors, bbox_deltas, scores, frames_per_vid): # Get mode-dependent configuration cfg_key = 'TRAIN' if self._train else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A * frames_per_vid, H, W) format from # conv output # - transpose to (H, W, 4 * A * frames_per_vid) # - reshape to (H * W * A, 4 * frames_per_vid) where rows are ordered # by (H, W, A) in slowest to fastest order to match the enumerated # anchors bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape( (-1, 4 * frames_per_vid)) # Same story for the scores: # - scores are (A, H, W) format from conv output # This computes the score for the tube # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if pre_nms_topN <= 0 or pre_nms_topN > len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] all_anchors = all_anchors[order, :] scores = scores[order] # 1. Transform anchors into proposals via bbox transformations proposals = box_utils.bbox_transform(all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0)) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < min_size # again, needs to be done one each frame and need to "AND" over frames keep = np.arange(proposals.shape[0]) for frame_id in range(frames_per_vid): keep = np.intersect1d( keep, _filter_boxes(proposals[:, frame_id * 4:(frame_id + 1) * 4], min_size, im_info)) proposals = proposals[keep, :] scores = scores[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if nms_thresh > 0: # When we're training on multiple GPUs, running NMS on the GPU # causes serious perf issues. We need to debug, but for now force # running on the CPU when training keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores
def im_detect_bbox(model, im, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) # Read out blobs if cfg.MODEL.FASTER_RCNN: assert len(im_scales) == 1, \ 'Only single-image / single-scale batch implemented' rois = workspace.FetchBlob(core.ScopedName('rois')) # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas #for b in workspace.Blobs(): #print(b) box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred_voc')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, im_scales
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None): """Prepare the bbox for testing""" inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique( hashes, return_index=True, return_inverse=True ) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') if cfg.PYTORCH_VERSION_LESS_THAN_040: inputs['data'] = [Variable(torch.from_numpy(inputs['data']), volatile=True)] inputs['im_info'] = [Variable(torch.from_numpy(inputs['im_info']), volatile=True)] else: inputs['data'] = [torch.from_numpy(inputs['data'])] inputs['im_info'] = [torch.from_numpy(inputs['im_info'])] return_dict = model(**inputs) if cfg.MODEL.FASTER_RCNN: rois = return_dict['rois'].data.cpu().numpy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scale # cls prob (activations after softmax) scores = return_dict['cls_score'].data.cpu().numpy().squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = return_dict['bbox_pred'].data.cpu().numpy().squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # (legacy) Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \ + cfg.TRAIN.BBOX_NORMALIZE_MEANS pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, im_scale, return_dict['blob_conv']
def forward(self, scores, gt_boxes, img_info): """ Args: scores (Tensor[1, num_anchors * num_classes, H, W]): Classification scores. gt_boxes (Tensor[N, 6]): Ground-truth boxes in (x1, y1, x2, y2, class, person_id) format img_info (Tensor[3]): (height, width, scale) Returns: labels (Tensor): Ground-truth labels of the anchors. deltas (Tensor): Ground-truth regression deltas of the anchors. inside_weights, outside_weights (Tensor): Used to calculate smooth_l1_loss """ # Algorithm: # # For each (H, W) location i # Generate A anchors centered on cell i # Filter out-of-image anchors # Measure the overlaps between anchors and gt_boxes # Assign labels, deltas, inside_weights, outside_weights for each anchor assert scores.size(0) == 1, "Single batch only." height, width = scores.shape[-2:] # Enumerate all shifts (NOTE: torch.meshgrid is different from np.meshgrid) shift_x = torch.arange(0, width) * self.feat_stride shift_y = torch.arange(0, height) * self.feat_stride shift_y, shift_x = torch.meshgrid(shift_y, shift_x) shift_x, shift_y = shift_x.contiguous(), shift_y.contiguous() shifts = torch.stack((shift_x.view(-1), shift_y.view(-1), shift_x.view(-1), shift_y.view(-1)), dim=1) shifts = shifts.type_as(gt_boxes) # Enumerate all shifted anchors: # Add A anchors (1, A, 4) to K shifts (K, 1, 4) to get shifted anchors (K, A, 4) # Reshape to (K * A, 4) shifted anchors A = self.num_anchors K = shifts.size(0) self.anchors = self.anchors.type_as(gt_boxes) anchors = self.anchors.view(1, A, 4) + shifts.view(1, K, 4).permute( 1, 0, 2) anchors = anchors.view(K * A, 4) # Only keep anchors inside the image keep = torch.nonzero((anchors[:, 0] >= 0) & (anchors[:, 1] >= 0) & (anchors[:, 2] < img_info[1]) & (anchors[:, 3] < img_info[0]))[:, 0] anchors = anchors[keep] overlaps = bbox_overlaps(anchors, gt_boxes[:, :4]) if "DEBUG" in os.environ: import numpy as np argmax_overlaps = np.argmax(overlaps.cpu(), axis=1) max_overlaps = torch.from_numpy( np.max(overlaps.cpu().numpy(), axis=1)) gt_max_overlaps = torch.from_numpy( np.max(overlaps.cpu().numpy(), axis=0)).type_as(overlaps) gt_argmax_overlaps = np.nonzero(overlaps == gt_max_overlaps)[:, 0] else: max_overlaps, argmax_overlaps = overlaps.max(dim=1) gt_max_overlaps = overlaps.max(dim=0)[0] gt_argmax_overlaps = torch.nonzero(overlaps == gt_max_overlaps)[:, 0] # label: 1 is positive, 0 is negative, -1 is dont care # The anchors which satisfied both positive and negative conditions will be as positive labels = gt_boxes.new(len(keep)).fill_(-1) # bg labels: below threshold IOU labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # Subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = torch.nonzero(labels == 1)[:, 0] if len(fg_inds) > num_fg: if "DEBUG" in os.environ: disable_inds = fg_inds[:len(fg_inds) - num_fg] else: disable_inds = torch_rand_choice(fg_inds, len(fg_inds) - num_fg) labels[disable_inds] = -1 # Subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - torch.sum(labels == 1) bg_inds = torch.nonzero(labels == 0)[:, 0] if len(bg_inds) > num_bg: if "DEBUG" in os.environ: disable_inds = bg_inds[:len(bg_inds) - num_bg] else: disable_inds = torch_rand_choice(bg_inds, len(bg_inds) - num_bg) labels[disable_inds] = -1 deltas = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) inside_weights = gt_boxes.new(deltas.shape).zero_() inside_weights[labels == 1] = gt_boxes.new( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) outside_weights = gt_boxes.new(deltas.shape).zero_() num_examples = torch.sum(labels >= 0) outside_weights[labels == 1] = gt_boxes.new(1, 4).fill_(1) / num_examples outside_weights[labels == 0] = gt_boxes.new(1, 4).fill_(1) / num_examples def map2origin(data, count=K * A, inds=keep, fill=0): """Map to original set.""" shape = (count, ) + data.shape[1:] origin = torch.empty(shape).fill_(fill).type_as(gt_boxes) origin[inds] = data return origin labels = map2origin(labels, fill=-1) deltas = map2origin(deltas) inside_weights = map2origin(inside_weights) outside_weights = map2origin(outside_weights) labels = labels.view(1, height, width, A).permute(0, 3, 1, 2) labels = labels.contiguous().view(1, 1, A * height, width) deltas = deltas.view(1, height, width, A * 4).permute(0, 3, 1, 2) inside_weights = inside_weights.view(1, height, width, A * 4).permute(0, 3, 1, 2) outside_weights = outside_weights.view(1, height, width, A * 4).permute(0, 3, 1, 2) return labels, deltas, inside_weights, outside_weights
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) # cls_probs, box_preds = [], [] # for lvl in range(k_min, k_max + 1): # suffix = 'fpn{}'.format(lvl) # cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) # box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) # for k, v in inputs.items(): # workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) # workspace.RunNet(model.net.Proto().name) # cls_probs = workspace.FetchBlobs(cls_probs) # box_preds = workspace.FetchBlobs(box_preds) return_dict = model(**inputs) cls_probs = return_dict['cls_score'] box_preds = return_dict['bbox_pred'] # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape(( # [1, 9, 80, 112, 160] cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape(( # 1 9 4 112 160 box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.data.cpu().numpy().squeeze().ravel() box_pred = box_pred.data.cpu().numpy() #.squeeze() # print(box_pred.shape, box_pred.squeeze().shape) # 1 9 4 112 60 -> 9 4 112 160 # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 # 0.05 or 0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[ -pre_nms_topn:] # select thos better than threshold and top 10000 inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose( ) #to transform index according to data shape # before ravel: inds 5d shape (160, 112, 80, 10, 5) from (10, 80, 112, 160) torch.Size([1, 9, 80, 112, 160]) # after ravel: inds 5d shape (10, 5) from (10,) torch.Size([1, 9, 80, 112, 160]) # inds_5d: ?, anchor_id, classes, y, x classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes #''' '''return None '''
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None, region_box=None): """Prepare the bbox for testing""" h = im.shape[0] w = im.shape[1] inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) #print("inputs['data']:",inputs['data']) #print("im_scale:", im_scale) #print("im_scale len:", len(im_scale)) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') if cfg.PYTORCH_VERSION_LESS_THAN_040: inputs['data'] = [ Variable(torch.from_numpy(inputs['data']), volatile=True) ] inputs['im_info'] = [ Variable(torch.from_numpy(inputs['im_info']), volatile=True) ] else: inputs['data'] = [torch.from_numpy(inputs['data'])] inputs['im_info'] = [torch.from_numpy(inputs['im_info'])] return_dict = model(**inputs) #boxes_cp = boxes.copy() #rois = return_dict['rois'].data.cpu().numpy() #boxes_cp = rois[:, 1:5] / im_scale #edge_index = (boxes_cp[:,0]<2)|(boxes_cp[:,1]<2) | (boxes_cp[:,2] > w-3) | (boxes_cp[:,3] > h-3) #print("edge_index:",edge_index) scores = return_dict['cls_score'].data.cpu().numpy().squeeze() if cfg.MODEL.FASTER_RCNN: rois = return_dict['rois'].data.cpu().numpy() # unscale back to raw image space if region_box == None: boxes = rois[:, 1:5] / im_scale else: #print(rois) boxes = rois[:, 1:5] / im_scale #print("boxes111:",boxes) #print("111:",boxes.shape) #[1000 * 4] edge_index = (boxes[:, 0] < 2) | (boxes[:, 1] < 2) | ( boxes[:, 2] > w - 2) | (boxes[:, 3] > h - 2) #boxes = boxes[~edge_index] scores[edge_index] = [0 for i in range(13)] #print("222:",boxes.shape) #[1000 * 4] x_1 = region_box[0] y_1 = region_box[1] #print("x_1,y_1:",x_1,y_1) #boxes = np.reshape(boxes, (-1,4)) boxes[:, (0, 2)] = boxes[:, (0, 2)] + x_1 - 1 #print("333:",boxes) boxes[:, (1, 3)] = boxes[:, (1, 3)] + y_1 - 1 #print("boxes222:",boxes) # cls prob (activations after softmax) # scores = return_dict['cls_score'].data.cpu().numpy().squeeze() #print("scores shape...:", scores.shape) # [1000 * 13] # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) #print("scores shape:", scores.shape) # [1000 * 13] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = return_dict['bbox_pred'].data.cpu().numpy().squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # (legacy) Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * cfg.TRAIN.BBOX_NORMALIZE_STDS \ + cfg.TRAIN.BBOX_NORMALIZE_MEANS pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) #print("pred_boxes shape:",pred_boxes.shape) # [1000 * 52] #pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] #print("pred_boxes shape:",pred_boxes.shape) #pred_boxes = pred_boxes[~edge_index] #print("pred_boxes shape...:",pred_boxes.shape) return scores, pred_boxes, im_scale, return_dict['blob_conv']
def _forward(self, data, im_info, roidb=None, **rpn_kwargs): im_data = data if self.training: roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb)) device_id = im_data.get_device() return_dict = {} # A dict to collect return variables blob_conv = self.Conv_Body(im_data) rpn_ret = self.RPN(blob_conv, im_info, roidb) # rpn proposals # if self.training: # # can be used to infer fg/bg ratio # return_dict['rois_label'] = rpn_ret['labels_int32'] rois_certification = False if cfg.FPN.FPN_ON: # Retain only the blobs that will be used for RoI heads. `blob_conv` may include # extra blobs that are used for RPN proposals, but not for RoI heads. blob_conv = blob_conv[-self.num_roi_levels:] if not self.training: return_dict['blob_conv'] = blob_conv if rois_certification: lvl_min = cfg.FPN.ROI_MIN_LEVEL lvl_max = cfg.FPN.ROI_MAX_LEVEL test_rpn_ret = {'rois': rpn_ret['rois']} lvls = fpn_utils.map_rois_to_fpn_levels(test_rpn_ret['rois'], lvl_min, lvl_max) rois_idx_order = np.empty((0, )) test_rois = test_rpn_ret['rois'] for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)): idx_lvl = np.where(lvls == lvl)[0] rois_lvl = test_rois[idx_lvl, :] rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) test_rpn_ret['rois_fpn{}'.format(lvl)] = rois_lvl rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False) test_rpn_ret['rois_idx_restore_int32'] = rois_idx_restore test_feat = self.Box_Head(blob_conv, test_rpn_ret) test_cls_score, test_bbox_pred = self.Box_Outs(test_feat) test_cls_score = test_cls_score.data.cpu().numpy().squeeze() test_bbox_pred = test_bbox_pred.data.cpu().numpy().squeeze() if not cfg.MODEL.RPN_ONLY: if cfg.MODEL.SHARE_RES5 and self.training: box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret) # bbox proposals else: box_feat = self.Box_Head(blob_conv, rpn_ret) cls_score, bbox_pred = self.Box_Outs(box_feat) else: # TODO: complete the returns for RPN only situation pass # 在这里开始计算loss if self.training: return_dict['losses'] = {} return_dict['metrics'] = {} # rpn loss rpn_kwargs.update( dict((k, rpn_ret[k]) for k in rpn_ret.keys() if (k.startswith('rpn_cls_logits') or k.startswith('rpn_bbox_pred')))) loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses( **rpn_kwargs) if cfg.FPN.FPN_ON: for i, lvl in enumerate( range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1)): return_dict['losses']['loss_rpn_cls_fpn%d' % lvl] = loss_rpn_cls[i] return_dict['losses']['loss_rpn_bbox_fpn%d' % lvl] = loss_rpn_bbox[i] else: return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox # bbox loss loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses( cls_score, bbox_pred, rpn_ret['labels_int32'], rpn_ret['bbox_targets'], rpn_ret['bbox_inside_weights'], rpn_ret['bbox_outside_weights']) return_dict['losses']['loss_cls'] = loss_cls return_dict['losses']['loss_bbox'] = loss_bbox return_dict['metrics']['accuracy_cls'] = accuracy_cls if cfg.MODEL.MASK_ON: if getattr(self.Mask_Head, 'SHARE_RES5', False): mask_feat = self.Mask_Head( res5_feat, rpn_ret, roi_has_mask_int32=rpn_ret['roi_has_mask_int32']) else: mask_feat = self.Mask_Head(blob_conv, rpn_ret) mask_pred = self.Mask_Outs(mask_feat) # return_dict['mask_pred'] = mask_pred # mask loss loss_mask = mask_rcnn_heads.mask_rcnn_losses( mask_pred, rpn_ret['masks_int32']) return_dict['losses']['loss_mask'] = loss_mask if cfg.MODEL.KEYPOINTS_ON: if getattr(self.Keypoint_Head, 'SHARE_RES5', False): # No corresponding keypoint head implemented yet (Neither in Detectron) # Also, rpn need to generate the label 'roi_has_keypoints_int32' kps_feat = self.Keypoint_Head( res5_feat, rpn_ret, roi_has_keypoints_int32=rpn_ret[ 'roi_has_keypoint_int32']) else: kps_feat = self.Keypoint_Head(blob_conv, rpn_ret) kps_pred = self.Keypoint_Outs(kps_feat) # return_dict['keypoints_pred'] = kps_pred # keypoints loss if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS: loss_keypoints = keypoint_rcnn_heads.keypoint_losses( kps_pred, rpn_ret['keypoint_locations_int32'], rpn_ret['keypoint_weights']) else: loss_keypoints = keypoint_rcnn_heads.keypoint_losses( kps_pred, rpn_ret['keypoint_locations_int32'], rpn_ret['keypoint_weights'], rpn_ret['keypoint_loss_normalizer']) return_dict['losses']['loss_kps'] = loss_keypoints # pytorch0.4 bug on gathering scalar(0-dim) tensors for k, v in return_dict['losses'].items(): return_dict['losses'][k] = v.unsqueeze(0) for k, v in return_dict['metrics'].items(): return_dict['metrics'][k] = v.unsqueeze(0) else: # Testing return_dict['rois'] = rpn_ret['rois'] import json if cfg.TEST.IOU_OUT: # 直接通过rpn_ret可以取出rois with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/raw_roi.json", 'w') as f: json.dump((return_dict['rois'][:, 1:] / im_info.numpy()[0][2]).tolist(), f) # 如果在FPN模式下,需要进到一个collect_and_distribute...的函数去取出分配后的scores # ,我直接在collect_and_distribute_fpn_rpn_proposals.py里把json输出 # 因此这里直接考虑RPN_ONLY模式的取值。 if not cfg.FPN.FPN_ON: with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/rois_score.json", 'w') as f: score_2_json = [] for item in rpn_ret['rpn_roi_probs']: score_2_json.append(item.item()) json.dump(score_2_json, f) # 开始第二个fast_head阶段,首先通过rois和bbox_delta计算pred_box if cfg.FAST_RCNN.FAST_HEAD2_DEBUG: lvl_min = cfg.FPN.ROI_MIN_LEVEL lvl_max = cfg.FPN.ROI_MAX_LEVEL if cfg.FPN.FPN_ON: im_scale = im_info.data.cpu().numpy().squeeze()[2] rois = rpn_ret['rois'][:, 1:5] / im_scale bbox_pred = bbox_pred.data.cpu().numpy().squeeze() box_deltas = bbox_pred.reshape([-1, bbox_pred.shape[-1]]) shift_boxes = box_utils.bbox_transform( rois, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) shift_boxes = box_utils.clip_tiled_boxes( shift_boxes, im_info.data.cpu().numpy().squeeze()[0:2]) num_classes = cfg.MODEL.NUM_CLASSES onecls_pred_boxes = [] onecls_score = [] dets_cls = {} count = 0 for j in range(1, num_classes): inds = np.where( cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0] boxes_j = shift_boxes[inds, j * 4:(j + 1) * 4] score_j = cls_score[inds, j] onecls_pred_boxes += boxes_j.tolist() onecls_score += score_j.tolist() dets_cls.update({j: []}) for k in range(len(boxes_j.tolist())): dets_cls[j].append(count) count += 1 assert count == len(onecls_pred_boxes) stage2_rois_score = np.array(onecls_score, dtype=np.float32) stage2_rois = np.array(onecls_pred_boxes, dtype=np.float32) # Redistribute stage2_rois using fpn_utils module provided functions # calculate by formula cls_tracker = {} if not stage2_rois.tolist(): stage1_pred_iou = stage2_rois_score.tolist() stage2_final_boxes = np.empty((0, )) stage2_final_score = np.empty((0, )) logger.info("Detections above threshold is null.") else: alter_rpn = {} unresize_stage2_rois = stage2_rois * im_scale # unresize_stage2_rois = np.concatenate((unresize_stage2_rois, unresized_rois[:, 1:5])) lvls = fpn_utils.map_rois_to_fpn_levels( unresize_stage2_rois, lvl_min, lvl_max) # TAG: We might need to visualize "stage2_rois" to make sure. rois_idx_order = np.empty((0, )) dummy_batch = np.zeros( (unresize_stage2_rois.shape[0], 1), dtype=np.float32) alter_rpn["rois"] = np.hstack( (dummy_batch, unresize_stage2_rois)).astype(np.float32, copy=False) # alter_rpn['rois'] = np.concatenate((alter_rpn['rois'], unresized_rois)) for output_idx, lvl in enumerate( range(lvl_min, lvl_max + 1)): idx_lvl = np.where(lvls == lvl)[0] rois_lvl = unresize_stage2_rois[idx_lvl, :] rois_idx_order = np.concatenate( (rois_idx_order, idx_lvl)) _ = np.zeros((rois_lvl.shape[0], 1), dtype=np.float32) alter_rpn['rois_fpn{}'.format(lvl)] = np.hstack( (_, rois_lvl)).astype(np.float32, copy=False) rois_idx_restore = np.argsort(rois_idx_order).astype( np.int32, copy=False) alter_rpn['rois_idx_restore_int32'] = rois_idx_restore # Go through 2nd stage of FPN and fast_head stage2_feat = self.Box_Head(blob_conv, alter_rpn) stage2_cls_score, stage2_bbox_pred = self.Box_Outs( stage2_feat) # Transform shift value to original one to get final pred boxes coordinates stage2_bbox_pred = stage2_bbox_pred.data.cpu().numpy( ).squeeze() stage2_cls_score = stage2_cls_score.data.cpu().numpy() stage2_box_deltas = stage2_bbox_pred.reshape( [-1, bbox_pred.shape[-1]]) # Add some variance to box delta if cfg.FAST_RCNN.STAGE1_TURBULENCE: import random for i in range(len(stage2_box_deltas)): for j in range(len(stage2_box_deltas[i])): stage2_box_deltas[i][j] *= random.uniform( 0.9, 1.1) stage2_cls_out = box_utils.bbox_transform( stage2_rois, stage2_box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) stage2_cls_out = box_utils.clip_tiled_boxes( stage2_cls_out, im_info.data.cpu().numpy().squeeze()[0:2]) onecls_pred_boxes = [] onecls_score = [] count = 0 for j in range(1, num_classes): inds = np.where( stage2_cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0] boxes_j = stage2_cls_out[inds, j * 4:(j + 1) * 4] score_j = stage2_cls_score[inds, j] dets_j = np.hstack( (boxes_j, score_j[:, np.newaxis])).astype(np.float32, copy=False) keep = box_utils.nms(dets_j, cfg.TEST.NMS) boxes_j = boxes_j[keep] score_j = score_j[keep] # 用于记录每个框属于第几类 onecls_score += score_j.tolist() onecls_pred_boxes += boxes_j.tolist() for k in range(len(score_j)): cls_tracker.update({count: j}) count += 1 assert count == len(onecls_score) stage2_final_boxes = np.array(onecls_pred_boxes, dtype=np.float32) stage2_final_score = np.array(onecls_score, dtype=np.float32) inds = np.where(stage2_final_score > 0.3)[0] # Filtered by keep index... preserve_stage2_final_boxes = copy.deepcopy( stage2_final_boxes) preserve_stage2_final_score = copy.deepcopy( stage2_final_score) stage2_final_boxes = stage2_final_boxes[inds] stage2_final_score = stage2_final_score[inds] # if nothing left after 0.3 threshold filter, reserve whole boxes to original. if stage2_final_boxes.size == 0: lower_inds = np.where( preserve_stage2_final_score > 0.1)[0] stage2_final_boxes = preserve_stage2_final_boxes[ lower_inds] stage2_final_score = preserve_stage2_final_score[ lower_inds] else: del preserve_stage2_final_boxes del preserve_stage2_final_score # if all boxes are clsfied into bg class. if stage2_final_boxes.size == 0: stage1_pred_iou = stage2_rois_score.tolist() stage2_final_boxes = np.empty((0, )) stage2_final_score = np.empty((0, )) logger.info("Detections above threshold is null.") else: # Restore stage2_pred_boxes to match the index with stage2_rois, Compute IOU between # final_boxes and stage2_rois, one by one flag = "cross_product" if flag == "element_wise": if stage2_final_boxes.shape[ 0] == stage2_rois.shape[0]: restored_stage2_final_boxes = stage2_final_boxes[ rois_idx_restore] stage1_pred_iou = [] for ind, item in enumerate(stage2_rois): stage1 = np.array( item, dtype=np.float32).reshape( (1, 4)) stage2 = np.array( restored_stage2_final_boxes[ind], dtype=np.float32).reshape((1, 4)) iou = box_utils.bbox_overlaps( stage1, stage2) stage1_pred_iou.append( iou.squeeze().item()) else: logger.info( "Mistake while processing {}".format( str(im_info))) elif flag == "cross_product": iou = box_utils.bbox_overlaps( stage2_rois, stage2_final_boxes) stage1_pred_iou = iou.max(axis=1).tolist() # stage1_pred is another name of stage2_rois assert len(stage1_pred_iou) == len(stage2_rois) if cfg.FAST_RCNN.IOU_NMS: with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_score.json", "w") as f: json.dump(stage2_rois_score.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage2_score.json", "w") as f: json.dump(stage2_final_score.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_pred_boxes.json", 'w') as f: json.dump(stage2_rois.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_pred_iou.json", 'w') as f: json.dump(stage1_pred_iou, f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage2_pred_boxes.json", 'w') as f: json.dump(stage2_final_boxes.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_dets_cls.json", 'w') as f: json.dump(dets_cls, f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_cls_tracker.json", 'w') as f: json.dump(cls_tracker, f) elif cfg.FAST_RCNN.SCORE_NMS: with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_score.json", "w") as f: json.dump(stage2_rois_score.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage2_score.json", "w") as f: json.dump(stage2_final_score.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_pred_boxes.json", 'w') as f: json.dump(stage2_rois.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_pred_iou.json", 'w') as f: json.dump(stage1_pred_iou, f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage2_pred_boxes.json", 'w') as f: json.dump(stage2_final_boxes.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_dets_cls.json", 'w') as f: json.dump(dets_cls, f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_cls_tracker.json", 'w') as f: json.dump(cls_tracker, f) else: im_scale = im_info.data.cpu().numpy().squeeze()[2] rois = rpn_ret['rois'][:, 1:5] / im_scale # unscale back to raw image space box_deltas = bbox_pred.data.cpu().numpy().squeeze() fast_stage1_score = cls_score.data.cpu().numpy().squeeze() box_deltas = box_deltas.reshape([-1, bbox_pred.shape[-1]]) stage2_rois = box_utils.bbox_transform( rois, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) stage2_rois = box_utils.clip_tiled_boxes( stage2_rois, im_info.data.cpu().numpy().squeeze()[0:2]) num_classes = cfg.MODEL.NUM_CLASSES onecls_pred_boxes = [] onecls_cls_score = [] for j in range(1, num_classes): inds = np.where( cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0] boxes_j = stage2_rois[inds, j * 4:(j + 1) * 4] score_j = fast_stage1_score[inds, j] onecls_pred_boxes += boxes_j.tolist() onecls_cls_score += score_j.tolist() stage2_rois = np.array(onecls_pred_boxes, dtype=np.float32) stage2_rois_score = np.array(onecls_cls_score, dtype=np.float32) assert len(stage2_rois) == len(stage2_rois_score) # Send stage2 rois to next stage fast head, do ROI ALIGN again # to modify rpn_ret['rois] , rpn_ret['rpn_rois'] and rpn['rois_rpn_score'] rpn_ret['rois'] = stage2_rois rpn_ret['rpn_rois'] = stage2_rois rpn_ret['rpn_roi_probs'] = stage2_rois_score stage2_box_feat = self.Box_Head(blob_conv, rpn_ret) stage2_cls_score, stage2_bbox_pred = self.Box_Outs( stage2_box_feat) stage2_bbox_pred = stage2_bbox_pred.data.cpu().numpy( ).squeeze() stage2_bbox_pred = stage2_bbox_pred.reshape( [-1, bbox_pred.shape[-1]]) stage2_cls_pred_boxes = box_utils.bbox_transform( stage2_rois, stage2_bbox_pred, cfg.MODEL.BBOX_REG_WEIGHTS) stage2_cls_pred_boxes = box_utils.clip_tiled_boxes( stage2_cls_pred_boxes, im_info.data.cpu().numpy().squeeze()[0:2]) onecls_pred_boxes = [] onecls_cls_score = [] for j in range(1, num_classes): inds = np.where( stage2_cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0] if len(inds) != 0: print("KKKKK") boxes_j = stage2_cls_pred_boxes[inds, j * 4:(j + 1) * 4] score_j = stage2_cls_score[inds, j] onecls_pred_boxes += boxes_j.tolist() onecls_cls_score += score_j.tolist() stage2_bbox_pred = np.array(onecls_pred_boxes, dtype=np.float32) stage2_bbox_pred_score = np.array(onecls_cls_score, dtype=np.float32) # get stage2 pred_boxes here return_dict['cls_score'] = cls_score return_dict['bbox_pred'] = bbox_pred return return_dict
def im_detections(model, im, anchors): """Generate RetinaNet detections on a single image.""" k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], inputs['im_info'] = _get_image_blob(im) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) scale = inputs['im_info'][0, 2] cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2. ** lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape(( cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape(( box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition( cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack( [box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x)] ) pred_boxes = ( box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) # Combine predictions across all levels and retain the top scoring by class detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] boxes = detections[:, 0:4] scores = detections[:, 4] classes = detections[:, 5] return boxes, scores, classes
def _forward(self, data, im_info, roidb=None, **rpn_kwargs): im_data = data if self.training: roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb)) device_id = im_data.get_device() return_dict = {} # A dict to collect return variables blob_conv = self.Conv_Body(im_data) rpn_ret = self.RPN(blob_conv, im_info, roidb) # if self.training: # # can be used to infer fg/bg ratio # return_dict['rois_label'] = rpn_ret['labels_int32'] if cfg.FPN.FPN_ON: # Retain only the blobs that will be used for RoI heads. `blob_conv` may include # extra blobs that are used for RPN proposals, but not for RoI heads. blob_conv = blob_conv[-self.num_roi_levels:] if not self.training: return_dict['blob_conv'] = blob_conv if not cfg.MODEL.RPN_ONLY: if cfg.MODEL.SHARE_RES5 and self.training: box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret) else: box_feat = self.Box_Head(blob_conv, rpn_ret) cls_score, bbox_pred = self.Box_Outs(box_feat) else: # TODO: complete the returns for RPN only situation pass if self.training: return_dict['losses'] = {} return_dict['metrics'] = {} # rpn loss rpn_kwargs.update( dict((k, rpn_ret[k]) for k in rpn_ret.keys() if (k.startswith('rpn_cls_logits') or k.startswith('rpn_bbox_pred')))) loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses( **rpn_kwargs) if cfg.FPN.FPN_ON: for k, lvl in enumerate( range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1)): return_dict['losses']['loss_rpn_cls_fpn%d' % lvl] = loss_rpn_cls[k] return_dict['losses']['loss_rpn_bbox_fpn%d' % lvl] = loss_rpn_bbox[k] else: return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox # bbox loss loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses( cls_score, bbox_pred, rpn_ret['labels_int32'], rpn_ret['bbox_targets'], rpn_ret['bbox_inside_weights'], rpn_ret['bbox_outside_weights'], stage=0) return_dict['losses']['loss_cls'] = loss_cls return_dict['losses']['loss_bbox'] = loss_bbox return_dict['metrics']['accuracy_cls'] = accuracy_cls if cfg.MODEL.MASK_ON: if getattr(self.Mask_Head, 'SHARE_RES5', False): mask_feat = self.Mask_Head( res5_feat, rpn_ret, roi_has_mask_int32=rpn_ret['roi_has_mask_int32']) else: mask_feat = self.Mask_Head(blob_conv, rpn_ret) mask_pred = self.Mask_Outs(mask_feat) # return_dict['mask_pred'] = mask_pred # mask loss loss_mask = mask_rcnn_heads.mask_rcnn_losses( mask_pred, rpn_ret['masks_int32']) return_dict['losses']['loss_mask'] = loss_mask if cfg.MODEL.KEYPOINTS_ON: if getattr(self.Keypoint_Head, 'SHARE_RES5', False): # No corresponding keypoint head implemented yet (Neither in Detectron) # Also, rpn need to generate the label 'roi_has_keypoints_int32' kps_feat = self.Keypoint_Head( res5_feat, rpn_ret, roi_has_keypoints_int32=rpn_ret[ 'roi_has_keypoint_int32']) else: kps_feat = self.Keypoint_Head(blob_conv, rpn_ret) kps_pred = self.Keypoint_Outs(kps_feat) # return_dict['keypoints_pred'] = kps_pred # keypoints loss if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS: loss_keypoints = keypoint_rcnn_heads.keypoint_losses( kps_pred, rpn_ret['keypoint_locations_int32'], rpn_ret['keypoint_weights']) else: loss_keypoints = keypoint_rcnn_heads.keypoint_losses( kps_pred, rpn_ret['keypoint_locations_int32'], rpn_ret['keypoint_weights'], rpn_ret['keypoint_loss_normalizer']) return_dict['losses']['loss_kps'] = loss_keypoints # pytorch0.4 bug on gathering scalar(0-dim) tensors for k, v in return_dict['losses'].items(): return_dict['losses'][k] = v.unsqueeze(0) for k, v in return_dict['metrics'].items(): return_dict['metrics'][k] = v.unsqueeze(0) if not cfg.FAST_RCNN.USE_CASCADE: return_dict['rois'] = rpn_ret['rois'] return_dict['cls_score'] = cls_score return_dict['bbox_pred'] = bbox_pred else: return_dict['rois' + '_{}'.format(0)] = rpn_ret['rois'] return_dict['cls_score' + '_{}'.format(0)] = cls_score.detach() return_dict['bbox_pred' + '_{}'.format(0)] = bbox_pred.detach() if cfg.FAST_RCNN.USE_CASCADE: for i in range(2): i += 1 pre_stage_name = '_{}'.format(i - 1) cls_score_cpu = cls_score.data.cpu().numpy() import utils.boxes as box_utils bbox_pred_cpu = bbox_pred.reshape( [-1, bbox_pred.shape[-1]]).data.cpu().numpy().squeeze() rois = deepcopy(return_dict['rois' + pre_stage_name]) assert cfg.MODEL.CLS_AGNOSTIC_BBOX_REG is True if not cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: cls_loc = np.argmax(cls_score_cpu[:, 1:], axis=1) + 1 cls_loc = np.reshape(cls_loc, (cls_loc.shape[0], 1)) # Based on scores, we can select transformed rois scores = np.zeros((cls_score_cpu.shape[0], 1)) for k in range(len(cls_loc)): scores[k] = cls_score_cpu[k, cls_loc[k]] batch_inds = rois[:, 0] uni_inds = np.unique(batch_inds) # WE suppose the WIDTH of image is EQUAL to its HEIGHT # We also provide an example to show how to perform the operation batch-wise # Scale forward batch_se = [] for e in range(len(uni_inds)): id_min = min(np.where(batch_inds == uni_inds[e])[0]) id_max = max(np.where(batch_inds == uni_inds[e])[0]) rois[id_min:id_max + 1, 1:5] /= im_info[e][2] batch_se.append([id_min, id_max]) pred_boxes = box_utils.bbox_transform( rois[:, 1:5], bbox_pred_cpu, cfg.CASCADE_RCNN.BBOX_REG_WEIGHTS[i]) # Scale back for e in range(len(uni_inds)): id_min = batch_se[e][0] id_max = batch_se[e][1] pred_boxes[id_min:id_max + 1] *= im_info[e][2] pred_boxes[id_min:id_max + 1] = box_utils.clip_tiled_boxes( pred_boxes[id_min:id_max + 1], im_info[e][0:2]) cfg_key = 'TRAIN' if self.training else 'TEST' min_size = cfg[cfg_key].RPN_MIN_SIZE if not cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Cannot use for loop here which may cause "illegal memory access" # Thanks to Chen-Wei Xie ! rows = pred_boxes.shape[0] b3 = cls_loc * 4 + np.array([0, 1, 2, 3]) b4 = np.array(range(rows)) c = pred_boxes[np.repeat(b4, 4), b3.flatten()] proposals = np.reshape(c, (-1, 4)) else: proposals = pred_boxes[:, 4:8] keep = _filter_boxes(proposals, min_size, im_info[0]) proposals = proposals[keep, :] batch_inds = batch_inds[keep] batch_inds = np.reshape(batch_inds, [len(batch_inds), 1]) proposals = np.concatenate((batch_inds, proposals), axis=1) from modeling.collect_and_distribute_fpn_rpn_proposals import CollectAndDistributeFpnRpnProposalsOp self.CollectAndDistributeFpnRpnProposals = CollectAndDistributeFpnRpnProposalsOp( ) self.CollectAndDistributeFpnRpnProposals.training = self.training # proposals.astype('float32') blobs_out = self.CollectAndDistributeFpnRpnProposals(proposals, roidb, im_info, stage=i) # Update rpn_ret new_rpn_ret = {} for key, value in rpn_ret.items(): if 'rpn' in key: new_rpn_ret[key] = value new_rpn_ret.update(blobs_out) if not self.training: return_dict['blob_conv'] = blob_conv if not cfg.MODEL.RPN_ONLY: if i == 1: if cfg.MODEL.SHARE_RES5 and self.training: box_feat, res5_feat = self.Box_Head_2( blob_conv, new_rpn_ret) else: box_feat = self.Box_Head_2(blob_conv, new_rpn_ret) cls_score, bbox_pred = self.Box_Outs_2(box_feat) elif i == 2: if cfg.MODEL.SHARE_RES5 and self.training: box_feat, res5_feat = self.Box_Head_3( blob_conv, new_rpn_ret) else: box_feat = self.Box_Head_3(blob_conv, new_rpn_ret) cls_score, bbox_pred = self.Box_Outs_3(box_feat) if self.training: # rpn loss rpn_kwargs.update( dict((k, new_rpn_ret[k]) for k in new_rpn_ret.keys() if (k.startswith('rpn_cls_logits') or k.startswith('rpn_bbox_pred')))) loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses( **rpn_kwargs) if cfg.FPN.FPN_ON: for k, lvl in enumerate( range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1)): return_dict['losses']['loss_rpn_cls_fpn%d' % lvl] += loss_rpn_cls[k] return_dict['losses']['loss_rpn_bbox_fpn%d' % lvl] += loss_rpn_bbox[k] else: return_dict['losses']['loss_rpn_cls'] += loss_rpn_cls return_dict['losses']['loss_rpn_bbox'] += loss_rpn_bbox # bbox loss loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses( cls_score, bbox_pred, new_rpn_ret['labels_int32'], new_rpn_ret['bbox_targets'], new_rpn_ret['bbox_inside_weights'], new_rpn_ret['bbox_outside_weights'], stage=i) return_dict['losses']['loss_cls'] += loss_cls return_dict['losses']['loss_bbox'] += loss_bbox return_dict['metrics']['accuracy_cls'] += accuracy_cls return_dict['rois' + '_{}'.format(i)] = deepcopy( new_rpn_ret['rois']) return_dict['cls_score' + '_{}'.format(i)] = cls_score.detach() return_dict['bbox_pred' + '_{}'.format(i)] = bbox_pred.detach() rpn_ret = new_rpn_ret.copy() return return_dict