def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in range(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) if 'feat_conv_3x3_relu_output' in output_all[0]: feat = output_all[0]['feat_conv_3x3_relu_output'] else: feat = None return scores_all, pred_boxes_all, data_dict_all, feat
def im_batch_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in range(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): im_infos = data_dict['im_info'].asnumpy() # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] rois = output['rois_output'].asnumpy() for im_idx in range(im_infos.shape[0]): bb_idxs = np.where(rois[:, 0] == im_idx)[0] im_shape = im_infos[im_idx, :2].astype(np.int) # post processing pred_boxes = bbox_pred(rois[bb_idxs, 1:], bbox_deltas[bb_idxs, :]) pred_boxes = clip_boxes(pred_boxes, im_shape) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale[im_idx] scores_all.append(scores[bb_idxs, :]) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int( im_info[1] / self._feat_stride) if DEBUG: print('score map size: {}'.format(scores.shape)) print("resudial: {}".format( (scores.shape[2] - height, scores.shape[3] - width))) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))