def __init__(self, feat_stride, scales, ratios, output_score, rpn_pre_nms_top_n, rpn_post_nms_top_n, threshold, rpn_min_size): super(ProposalOperator, self).__init__() self._feat_stride = feat_stride self._scales = np.fromstring(scales[1:-1], dtype=float, sep=',') self._ratios = np.fromstring(ratios[1:-1], dtype=float, sep=',') self._anchors = generate_anchors(base_size=self._feat_stride, scales=self._scales, ratios=self._ratios) self._num_anchors = self._anchors.shape[0] self._output_score = output_score self._rpn_pre_nms_top_n = rpn_pre_nms_top_n self._rpn_post_nms_top_n = rpn_post_nms_top_n self._threshold = threshold self._rpn_min_size = rpn_min_size logger.debug('feat_stride: %s' % self._feat_stride) logger.debug('anchors:\n%s' % self._anchors)
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] # if batch_size > 1: # raise ValueError("Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size blob_all = np.empty((0, 5)) scores_all = np.empty((0, 1)) num_image = config.NUM_IMAGES_3DCE key_idx = (num_image-1)/2 for i in range(key_idx, batch_size, num_image): # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[i:i+1, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy()[i:i+1, :, :, :] im_info = in_data[2].asnumpy()[i, :] logger.debug('im_info: %s' % im_info) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride) logger.debug('score map size: (%d, %d)' % (scores.shape[2], scores.shape[3])) logger.debug('resudial: (%d, %d)' % (scores.shape[2] - height, scores.shape[3] - width)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order # print bbox_deltas.shape, height, width, scores.shape[2], scores.shape[3] bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations # print anchors.shape, bbox_deltas.shape proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (LEGAL NOTICE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # adjust for 3DCE batch_inds = (i-key_idx)/num_image batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) + batch_inds blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) blob_all = np.vstack([blob_all, blob]) scores_all = np.vstack([scores_all, scores]) self.assign(out_data[0], req[0], blob_all) if self._output_score: self.assign(out_data[1], req[1], scores_all.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): """Implements forward computation. is_train : bool, whether forwarding for training or testing. req : list of {'null', 'write', 'inplace', 'add'}, how to assign to out_data. 'null' means skip assignment, etc. in_data : list of NDArray, input data. out_data : list of NDArray, pre-allocated output buffers. aux : list of NDArray, mutable auxiliary states. Usually not used. """ nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # 对(H,W)大小的特征图上的每一点i: # 以 i 为中心生成A个锚定框 # 利用回归的位置参数,修正这 A 个 anchor 的位置,得到 RoIs # 将预测的边界框裁剪成图像 # 清除掉预测边界框中长或宽 小于阈值的 # 按分数降序排列(proposal,score) # 在采用NMS取前N个预测边界框 # 使用阈值0.7对这N个框使用非极大值抑制 # 取使用NMS后前n个预测边界框 # 返回前Top n 个的边界框,进行分类和回归 pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] logger.debug('im_info: %s' % im_info) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int( im_info[1] / self._feat_stride) logger.debug('score map size: (%d, %d)' % (scores.shape[2], scores.shape[3])) logger.debug('resudial: (%d, %d)' % (scores.shape[2] - height, scores.shape[3] - width)) # Enumerate all shifts # 这块的思路是生成一系列的shift, 然后每一个shift和9个anchor相加,迭代出每一个位置的9个框 shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) #产生一个以向量x为行,向量y为列的矩阵 #经过meshgrid shift_x = [[ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] ..., [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592]] #shift_y = [[ 0 0 0 ..., 0 0 0] [ 16 16 16 ..., 16 16 16] [ 32 32 32 ..., 32 32 32] ..., [560 560 560 ..., 560 560 560] [576 576 576 ..., 576 576 576] [592 592 592 ..., 592 592 592]] shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # 转至之后形成所有位移 # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] # _anchors中每一个anchor和每一个shift相加得出结果 anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) # K个位移,每个位移A个框 anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations # 根据回归的偏移量修正位置 proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image # 裁剪掉边框超出图片边界的部分 proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) # 清除掉预测边界框中长或宽 小于阈值的 keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) # 按分数降序排列,并取前N个(proposal, score) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged # 如果不够,就随机选择不足的个数来填充 if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # 输出ROIS,送给fast-rcnn训练 # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) # 形成五元组(0,x1,y1,x2,y2) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): assert self._batch_rois % self._batch_images == 0, \ 'BATCHIMAGES {} must devide BATCH_ROIS {}'.format(self._batch_images, self._batch_rois) rois_per_image = self._batch_rois / self._batch_images fg_rois_per_image = np.round(self._fg_fraction * rois_per_image).astype(int) all_rois = in_data[0].asnumpy() gt_boxes = in_data[1].asnumpy() gt_keypoints = in_data[2].asnumpy() # Include ground-truth boxes in the set of candidate rois zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1])))) # Sanity check: single batch only assert np.all( all_rois[:, 0] == 0), 'Only single item batches are supported' rois, labels, bbox_targets, bbox_weights, keypoints_label = \ sample_rois(all_rois, fg_rois_per_image, rois_per_image, self._num_classes, gt_boxes=gt_boxes, gt_keypoints=gt_keypoints) if logger.level == logging.DEBUG: logger.debug("labels: %s" % labels) logger.debug('num fg: {}'.format((labels > 0).sum())) logger.debug('num bg: {}'.format((labels == 0).sum())) self._count += 1 self._fg_num += (labels > 0).sum() self._bg_num += (labels == 0).sum() logger.debug("self._count: %d" % self._count) logger.debug('num fg avg: %d' % (self._fg_num / self._count)) logger.debug('num bg avg: %d' % (self._bg_num / self._count)) logger.debug('ratio: %.3f' % (float(self._fg_num) / float(self._bg_num))) for ind, val in enumerate( [rois, labels, bbox_targets, bbox_weights, keypoints_label]): self.assign(out_data[ind], req[ind], val)