def im_detect(predictor, data_batch, data_names, scale): st = time.time() output = predictor.predict(data_batch) et = time.time() print 'predict{:.4f}s'.format(et - st) data_dict = dict(zip(data_names, data_batch.data)) if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale et = time.time() print 'im_detect{:.4f}s'.format(et - st) return scores, pred_boxes, data_dict
def im_detect(predictor, data_batch, data_names, scale): output = predictor.predict(data_batch) data_dict = dict(zip(data_names, data_batch.data)) if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] import ipdb ipdb.set_trace() # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale if config.HAS_PART: head_scores = output['head_prob_reshape_output'].asnumpy()[0] head_gids = np.argmax(head_scores, axis=1) head_deltas = output['head_pred_reshape_output'].asnumpy()[0] # means = config.TRAIN.BBOX_MEANS stds = np.reshape(np.array(config.TRAIN.BBOX_STDS), (-1, 4)) head_deltas *= np.tile(stds, (1, head_scores.shape[1])) head_boxes = pred_head(rois, head_deltas, head_gids, config.PART_GRID_HW) head_boxes /= scale joints_scores = [ output['joint_prob{}_reshape_output'.format(i)].asnumpy()[0] for i in range(4) ] joints_gids = [np.argmax(j, axis=1) for j in joints_scores] joints_deltas = [ output['joint_pred{}_reshape_output'.format(i)].asnumpy()[0] for i in range(4) ] joints_deltas = [ j * np.tile(stds[:, :2], (1, head_scores.shape[1])) for j in joints_deltas ] joints = [pred_joint(rois, jd, jid, config.PART_GRID_HW) \ for (jd, jid) in zip(joints_deltas, joints_gids)] joints = np.hstack(joints) joints /= scale return scores, pred_boxes, head_boxes, joints, data_dict return scores, pred_boxes, data_dict
def im_detect(self, im_array, im_info=None, roi_array=None): """ perform detection of designated im, box, must follow minibatch.get_testbatch format :param im_array: numpy.ndarray [b c h w] :param im_info: numpy.ndarray [b 3] :param roi_array: numpy.ndarray [roi_num 5] :return: scores, pred_boxes """ # fill in data if config.TEST.HAS_RPN: self.arg_params['data'] = mx.nd.array(im_array, self.ctx) self.arg_params['im_info'] = mx.nd.array(im_info, self.ctx) arg_shapes, out_shapes, aux_shapes = \ self.symbol.infer_shape(data=self.arg_params['data'].shape, im_info=self.arg_params['im_info'].shape) else: self.arg_params['data'] = mx.nd.array(im_array, self.ctx) self.arg_params['rois'] = mx.nd.array(roi_array, self.ctx) arg_shapes, out_shapes, aux_shapes = \ self.symbol.infer_shape(data=self.arg_params['data'].shape, rois=self.arg_params['rois'].shape) # fill in label arg_shapes_dict = { name: shape for name, shape in zip(self.symbol.list_arguments(), arg_shapes) } self.arg_params['cls_prob_label'] = mx.nd.zeros( arg_shapes_dict['cls_prob_label'], self.ctx) # execute self.executor = self.symbol.bind(self.ctx, self.arg_params, args_grad=None, grad_req='null', aux_states=self.aux_params) output_dict = { name: nd for name, nd in zip(self.symbol.list_outputs(), self.executor.outputs) } self.executor.forward(is_train=False) # save output scores = output_dict['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output_dict['bbox_pred_reshape_output'].asnumpy()[0] if config.TEST.HAS_RPN: rois = output_dict['rois_output'].asnumpy()[:, 1:] else: rois = roi_array[:, 1:] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_array[0].shape[-2:]) return scores, pred_boxes
def im_detect(predictor, data_batch, data_names): output = predictor.predict(data_batch) data_dict = dict(zip(data_names, data_batch.data)) if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy()[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) return scores, pred_boxes, data_dict
def im_detect(predictor, data_batch, data_names, scale): output = predictor.predict(data_batch) data_dict = dict(zip(data_names, data_batch.data)) if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale return scores, pred_boxes, data_dict
def im_detect(predictor, data_batch, data_names, scales): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] pred_masks_all = [] rois_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: raise NotImplementedError im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] pred_masks = output['mask_pred_output'].asnumpy() # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back rois = rois / scale pred_boxes = pred_boxes / scale #print scores.shape, rois.shape, pred_boxes.shape, pred_masks.shape scores_all.append(scores) rois_all.append(rois) pred_boxes_all.append(pred_boxes) pred_masks_all.append(pred_masks) return scores_all, rois_all, pred_boxes_all, pred_masks_all, data_dict_all
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride) if DEBUG: print('score map size: {}'.format(scores.shape)) print("resudial: {}".format((scores.shape[2] - height, scores.shape[3] - width))) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): """Implements forward computation. is_train : bool, whether forwarding for training or testing. req : list of {'null', 'write', 'inplace', 'add'}, how to assign to out_data. 'null' means skip assignment, etc. in_data : list of NDArray, input data. out_data : list of NDArray, pre-allocated output buffers. aux : list of NDArray, mutable auxiliary states. Usually not used. """ nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # 对(H,W)大小的特征图上的每一点i: # 以 i 为中心生成A个锚定框 # 利用回归的位置参数,修正这 A 个 anchor 的位置,得到 RoIs # 将预测的边界框裁剪成图像 # 清除掉预测边界框中长或宽 小于阈值的 # 按分数降序排列(proposal,score) # 在采用NMS取前N个预测边界框 # 使用阈值0.7对这N个框使用非极大值抑制 # 取使用NMS后前n个预测边界框 # 返回前Top n 个的边界框,进行分类和回归 pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] logger.debug('im_info: %s' % im_info) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int( im_info[1] / self._feat_stride) logger.debug('score map size: (%d, %d)' % (scores.shape[2], scores.shape[3])) logger.debug('resudial: (%d, %d)' % (scores.shape[2] - height, scores.shape[3] - width)) # Enumerate all shifts # 这块的思路是生成一系列的shift, 然后每一个shift和9个anchor相加,迭代出每一个位置的9个框 shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) #产生一个以向量x为行,向量y为列的矩阵 #经过meshgrid shift_x = [[ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] ..., [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592]] #shift_y = [[ 0 0 0 ..., 0 0 0] [ 16 16 16 ..., 16 16 16] [ 32 32 32 ..., 32 32 32] ..., [560 560 560 ..., 560 560 560] [576 576 576 ..., 576 576 576] [592 592 592 ..., 592 592 592]] shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # 转至之后形成所有位移 # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] # _anchors中每一个anchor和每一个shift相加得出结果 anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) # K个位移,每个位移A个框 anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations # 根据回归的偏移量修正位置 proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image # 裁剪掉边框超出图片边界的部分 proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) # 清除掉预测边界框中长或宽 小于阈值的 keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) # 按分数降序排列,并取前N个(proposal, score) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged # 如果不够,就随机选择不足的个数来填充 if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # 输出ROIS,送给fast-rcnn训练 # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) # 形成五元组(0,x1,y1,x2,y2) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))