def im_detect(self, im): cls_prob = self.cls_prob bbox_pred = self.bbox_pred rois = self.rois if isinstance(rois, tuple): rois = rois[0] cls_prob = np.reshape(cls_prob, [-1, cfg.ZLRM.N_CLASSES + 1]) # (R, C+1) bbox_pred = np.reshape(bbox_pred, [-1, (cfg.ZLRM.N_CLASSES + 1) * 4]) # (R, (C+1)x4) rois = np.array(rois) boxes = rois[:, 1:5] / self.im_scale scores = cls_prob # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) return scores, pred_boxes
def im_detect(sess, net, im): blobs, im_scales = _get_blobs(im) im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) # forward pass feed_dict = {net.data: blobs['data'], net.im_info: blobs['im_info']} cls_prob, bbox_pred, rois = \ sess.run([net.get_output('cls_prob'), net.get_output('ave_bbox_pred_rois'), net.get_output('rois')], \ feed_dict=feed_dict) if isinstance(rois, tuple): rois = rois[0] cls_prob = np.reshape(cls_prob, [-1, cfg.ZLRM.N_CLASSES + 1]) # (R, C+1) bbox_pred = np.reshape(bbox_pred, [-1, (cfg.ZLRM.N_CLASSES + 1) * 4]) # (R, (C+1)x4) rois = np.array(rois) assert len(im_scales) == 1, "Only single-image batch implemented" boxes = rois[:, 1:5] / im_scales[0] scores = cls_prob if cfg.ZLRM.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) return scores, pred_boxes
def _process_boxes_scores(cls_prob, bbox_pred, rois, im_scale, im_shape): """ process the output tensors, to get the boxes and scores """ assert rois.shape[0] == bbox_pred.shape[0],\ 'rois and bbox_pred must have the same shape' boxes = rois[:, 1:5] scores = cls_prob if cfg.ZLRM.TEST.BBOX_REG: pred_boxes = bbox_transform_inv(boxes, deltas=bbox_pred) pred_boxes = clip_boxes(pred_boxes, im_shape) else: # Simply repeat the boxes, once for each class # boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes = clip_boxes(boxes, im_shape) return pred_boxes, scores
def proposal_layer(rpn_cls_prob_reshape_P2, rpn_bbox_pred_P2, \ rpn_cls_prob_reshape_P3, rpn_bbox_pred_P3, \ rpn_cls_prob_reshape_P4, rpn_bbox_pred_P4, \ rpn_cls_prob_reshape_P5, rpn_bbox_pred_P5, \ rpn_cls_prob_reshape_P6, rpn_bbox_pred_P6, \ im_info, cfg_train_key = True, _feat_strides = cfg.ZLRM.FPN_FEAT_STRIDE[2:], \ anchor_sizes = cfg.ZLRM.FPN_ANCHOR_SIZE[2:]): # anchor_scales = [8, 8, 8, 8, 8] """ Parameters ---------- rpn_cls_prob_reshape_P: (1 , H(P), W(P), A(P)x2) outputs of RPN, prob of bg or fg on pyramid layer P rpn_bbox_pred_P: (1 , H(P), W(P), A(P)x4), rgs boxes output of RPN on pyramid layer P im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_strides: the downsampling ratio of feature map to the original input image on each pyramid layer anchor_sizes: the absolute anchor sizes on each pyramid layer ---------- Returns ---------- rpn_rois : (sum(H x W x A), 5) e.g. [0, x1, y1, x2, y2] # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) """ anchor_scales = np.array(anchor_sizes) / np.array(_feat_strides) # _anchors = [generate_anchors(base_size=_feat_stride, scales=[anchor_scale]) for _feat_stride, anchor_scale in zip(_feat_strides, anchor_scales)] _anchors = [[], [], [], [], []] _anchors[0] = generate_anchors(base_size=_feat_strides[0], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[0]])) _anchors[1] = generate_anchors(base_size=_feat_strides[1], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[1]])) _anchors[2] = generate_anchors(base_size=_feat_strides[2], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[2]])) _anchors[3] = generate_anchors(base_size=_feat_strides[3], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[3]])) _anchors[4] = generate_anchors(base_size=_feat_strides[4], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[4]])) _num_anchors = [anchor.shape[0] for anchor in _anchors] im_info = im_info[0] #assert rpn_cls_prob_reshape.shape[0] == 1, \ # 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' if cfg_train_key==True: # print('使用TEST') pre_nms_topN = cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N # 12000 post_nms_topN = cfg.ZLRM.TRAIN.RPN_POST_NMS_TOP_N # 2000 nms_thresh = cfg.ZLRM.TRAIN.RPN_NMS_THRESH # 0.7 min_size = cfg.ZLRM.TRAIN.RPN_MIN_SIZE # 16 else: pre_nms_topN = cfg.ZLRM.TEST.RPN_PRE_NMS_TOP_N # 6000 post_nms_topN = cfg.ZLRM.TEST.RPN_POST_NMS_TOP_N # 300 nms_thresh = cfg.ZLRM.TEST.RPN_NMS_THRESH # 0.7 min_size = cfg.ZLRM.TEST.RPN_MIN_SIZE # 16 rpn_cls_prob_reshapes = [rpn_cls_prob_reshape_P2, rpn_cls_prob_reshape_P3, rpn_cls_prob_reshape_P4, rpn_cls_prob_reshape_P5, rpn_cls_prob_reshape_P6] bbox_deltas = [rpn_bbox_pred_P2, rpn_bbox_pred_P3, rpn_bbox_pred_P4, rpn_bbox_pred_P5, rpn_bbox_pred_P6] heights = [rpn_cls_prob_reshape.shape[1] for rpn_cls_prob_reshape in rpn_cls_prob_reshapes] widths = [rpn_cls_prob_reshape.shape[2] for rpn_cls_prob_reshape in rpn_cls_prob_reshapes] # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (4, 1, H, W, A(x)) --> (1, H, W, stack(A)) scores = [np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchor, 2])[:,:,:,:,1], [-1, 1]) for height, width, rpn_cls_prob_reshape, _num_anchor in zip(heights, widths, rpn_cls_prob_reshapes, _num_anchors)] # scores are (1 * H(P) * W(P) * A(P), 1) format # reshape to (sum(1 * H * W * A), 1) where rows are ordered by (h, w, a) scores = np.concatenate(scores, axis=0) if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print('score map size: {}'.format(scores.shape)) def gen_shift(height, width, _feat_stride): # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() return shift shifts = [gen_shift(height, width, _feat_stride) for height, width, _feat_stride in zip(heights, widths, _feat_strides)] # Enumerate all shifted anchors: # # add A anchors (4, 1, A(x), 4) to # cell K shifts (4, K, 1, 4) to get # shift anchors (4, K, A(x), 4) # reshape to (K*stack(A), 4) shifted anchors As = _num_anchors Ks = [shift.shape[0] for shift in shifts] anchors = [_anchor.reshape((1, A, 4)) + shift.reshape((1, K, 4)).transpose((1, 0, 2)) for A, K, _anchor, shift in zip(As, Ks, _anchors, shifts)] anchors = [anchor.reshape((K * A, 4)) for anchor, A, K in zip(anchors, As, Ks)] anchors = np.concatenate(anchors, axis=0) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A(x), H, W) format # transpose to (1, H, W, 4 * A(x)) # reshape to (1 * H * W * A(x), 4) where rows are ordered by (h, w, a) # in slowest to fastest order #bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4) bbox_deltas = [bbox_delta.reshape((-1, 4)) for bbox_delta in bbox_deltas] bbox_deltas = np.concatenate(bbox_deltas, axis=0) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) rpn_rois = blob if cfg_train_key == False: # assign rois to level Pk (P2 ~ P6) def calc_level(width, height): return min(6, max(2, int(4 + np.log2(np.sqrt(width * height) / 224)))) level = lambda roi : calc_level(roi[3] - roi[1], roi[4] - roi[2]) # roi: [0, x0, y0, x1, y1] leveled_rois = [None] * 5 leveled_idxs = [[], [], [], [], []] for idx, roi in enumerate(rpn_rois): level_idx = level(roi) - 2 leveled_idxs[level_idx].append(idx) for level_idx in range(0, 5): leveled_rois[level_idx] = rpn_rois[leveled_idxs[level_idx]] rpn_rois = np.concatenate(leveled_rois, axis=0) return leveled_rois[0], leveled_rois[1], leveled_rois[2], leveled_rois[3], leveled_rois[4], rpn_rois return rpn_rois
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key=True, _feat_stride=cfg.ZLRM.RESNET_50_FEAT_STRIDE, anchor_scales=cfg.ZLRM.ANCHOR_SCALE): """ Parameters ---------- rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) """ _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] # rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) #-> (1 , 2xA, H , W) # rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2]) # -> (1 , Ax4, H , W) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' # print('========================', cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N) # print('===================', cfg_key) if cfg_key == True: # print('使用TEST') pre_nms_topN = cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N # 12000 post_nms_topN = cfg.ZLRM.TRAIN.RPN_POST_NMS_TOP_N # 2000 nms_thresh = cfg.ZLRM.TRAIN.RPN_NMS_THRESH # 0.7 min_size = cfg.ZLRM.TRAIN.RPN_MIN_SIZE # 16 else: pre_nms_topN = cfg.ZLRM.TEST.RPN_PRE_NMS_TOP_N # 6000 post_nms_topN = cfg.ZLRM.TEST.RPN_POST_NMS_TOP_N # 300 nms_thresh = cfg.ZLRM.TEST.RPN_NMS_THRESH # 0.7 min_size = cfg.ZLRM.TEST.RPN_MIN_SIZE # 16 height, width = rpn_cls_prob_reshape.shape[1:3] # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (1, H, W, A) scores = np.reshape( np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:, :, :, :, 1], [1, height, width, _num_anchors]) # TODO: NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! # TODO: if you use the old trained model, VGGnet_fast_rcnn_iter_70000.ckpt, uncomment this line # scores = rpn_cls_prob_reshape[:,:,:,_num_anchors:] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # # remove irregular boxes, too fat too tall # keep = _filter_irregular_boxes(proposals) # proposals = proposals[keep, :] # scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # dets = np.hstack((blob, scores)).astype(np.float32) # print(dets.shape) # print('jjjjj=============', dets[:, -1]) return blob, scores