def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = readimage(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() # print('rois--------------', scores) print('Detection took {:.3f}s for ' '{:d} object proposals'.format(timer.total_time, boxes.shape[0])) CONF_THRESH = 0.7 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis(im, image_name, cls, dets, thresh=CONF_THRESH)
def detect(self, image): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image # Detect all object classes and regress object bounds image = image_transform_1_3(image) timer = Timer() timer.tic() scores, boxes = self.im_detect(image) timer.toc() # print('rois--------------', scores) print('Detection took {:.3f}s for ' '{:d} object proposals'.format(timer.total_time, boxes.shape[0])) CONF_THRESH = 0.7 NMS_THRESH = 0.1 for cls_ind, cls in enumerate(self.classes_detect[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] dets = dets[inds, :] return dets
def detect(self, image): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image # Detect all object classes and regress object bounds image = image_transform_1_3(image) timer = Timer() timer.tic() scores, boxes = self.im_detect(image) timer.toc() print('rois--------------', scores) print('Detection took {:.3f}s for ' '{:d} object proposals'.format(timer.total_time, len(boxes))) CONF_THRESH = 0.3 # print(scores) NMS_THRESH = 0.5 dets = [] for i in range(len(boxes)): # print('lll') cls_boxes = boxes[i] cls_scores = scores[i] dets_i_ = np.hstack([cls_boxes[:, 0:4], cls_scores]) keep = nms(dets_i_, NMS_THRESH) dets_i = np.hstack([cls_boxes, cls_scores]) dets_i = dets_i[keep, :] inds = np.where(dets_i[:, -1] >= CONF_THRESH)[0] dets_i = dets_i[inds, :] dets_i = dets_i[:, 0:5] dets.append(dets_i) return dets
def detect(self, image): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image # Detect all object classes and regress object bounds image = image_transform_1_3(image) timer = Timer() timer.tic() scores, boxes = self.im_detect(image) timer.toc() print('kkk', np.argmax(scores, axis=1)) print('lll', scores[np.argmax(scores, axis=1) == 4, 4]) print('Detection took {:.3f}s for ' '{:d} object proposals'.format(timer.total_time, boxes.shape[0])) CONF_THRESH = 0.3 NMS_THRESH = 0.5 dets_list = [] for cls_ind, cls in enumerate(self.classes_detect[1:]): inds = np.where(scores[:, cls_ind] > CONF_THRESH)[0] cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets[inds, :], NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] cls_ind_list = np.empty((len(inds), 1), np.int32) cls_ind_list.fill(cls_ind) dets = np.hstack((dets[inds, :-1], cls_ind_list)) dets_list.append(dets) dets = np.vstack(dets_list) print('jjj', dets) return dets
def conduct_nms(class_ids, refined_rois, class_scores, keep, config): """per SAMPLE operation; no batch size dim! Args: class_ids [say 1000] refined_rois [1000 4] class_scores [1000] keep [True, False, ...] altogether 1000 config config Returns: detection: [DET_MAX_INSTANCES, (y1, x1, y2, x2, class_id, class_score)] """ pre_nms_class_ids = class_ids[keep] pre_nms_scores = class_scores[keep] pre_nms_rois = refined_rois[torch.nonzero(keep).squeeze(), :] _indx = torch.nonzero(keep).squeeze() # conduct nms per CLASS for i, class_id in enumerate(unique1d(pre_nms_class_ids)): # Pick detections of this class ixs = torch.nonzero(class_id == pre_nms_class_ids).squeeze() ix_scores = pre_nms_scores[ixs] ix_rois = pre_nms_rois[ixs, :] # Sort ix_scores, order = ix_scores.sort(descending=True) ix_rois = ix_rois[order, :] class_keep = nms( torch.cat((ix_rois, ix_scores.unsqueeze(1)), dim=1).unsqueeze(0).data, config.TEST.DET_NMS_THRESHOLD)[0] # Map indices class_keep = _indx[ixs[order[class_keep.tolist()]]] if i == 0: nms_keep = class_keep else: nms_keep = unique1d(torch.cat((nms_keep, class_keep))) nms_indx = intersect1d(_indx, nms_keep) # Keep top detections roi_count = config.TEST.DET_MAX_INSTANCES top_ids = class_scores[nms_indx].sort(descending=True)[1][:roi_count] # final_index is the true index among the input samples (say 1000) final_index = nms_indx[top_ids].squeeze() # Arrange output as [DET_MAX_INSTANCES, (y1, x1, y2, x2, class_id, score)] # Coordinates are in image domain. detections = torch.cat((refined_rois[final_index], class_ids[final_index].unsqueeze(1).float(), class_scores[final_index].unsqueeze(1)), dim=1) return detections, final_index
def detect(self, text_proposals, scores, size): # 删除得分较低的proposal keep_inds = np.where( scores > cfg["TEXT"]["TEXT_PROPOSALS_MIN_SCORE"])[0] text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] # 按得分排序 sorted_indices = np.argsort(scores.ravel())[::-1] text_proposals, scores = text_proposals[sorted_indices], scores[ sorted_indices] # 对proposal做nms # print('text_proposals, scores', text_proposals.shape, scores.shape) keep_inds = nms(np.hstack((text_proposals, scores)), cfg["TEXT"]["TEXT_PROPOSALS_NMS_THRESH"]) # keep_inds = soft_nms(np.hstack((text_proposals, scores)),threshold=TextLineCfg.TEXT_PROPOSALS_NMS_THRESH) text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] # 获取检测结果 text_recs = self.text_proposal_connector.get_text_lines( text_proposals, scores, size) keep_inds = self.filter_boxes(text_recs) return text_proposals, scores, text_recs[keep_inds]
def detect(sess, net, image): image = image_transform_1_3(image) timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, image) timer.toc() # print('rois--------------', scores) print('Detection took {:.3f}s for ' '{:d} object proposals'.format(timer.total_time, boxes.shape[0])) CONF_THRESH = 0.7 NMS_THRESH = 0.1 for cls_ind, cls in enumerate(CLASSES_DEFECT[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] dets = dets[inds, :] return dets
def proposal_layer(inputs, proposal_count, nms_threshold, priors, config=None): """Receives anchor scores and selects a subset to pass as proposals to the second stage. Filtering is done based on anchor scores and non-max suppression to remove overlaps. It also applies bounding box refinement details to anchors. Args: inputs [0] rpn_probs: [batch, anchors, (bg prob, fg prob)] [1] rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] proposal_count: maximum output nms_threshold: for proposal priors: anchors config: configuration Returns: Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)] """ anchors = Variable(priors.cuda(), requires_grad=False) bs, prior_num = inputs[0].size(0), anchors.size(0) # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1] scores = inputs[0][:, :, 1] # Box deltas [batch, num_rois, 4] deltas = inputs[1] std_dev = Variable(torch.from_numpy(np.reshape(config.DATA.BBOX_STD_DEV, [1, 1, 4])).float(), requires_grad=False).cuda() deltas = deltas * std_dev anchors = anchors.expand(bs, anchors.size(0), anchors.size(1)) # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. pre_nms_limit = min(config.RPN.PRE_NMS_LIMIT, prior_num) scores, order = scores.sort(descending=True) scores = scores[:, :pre_nms_limit] order = order[:, :pre_nms_limit] deltas_trim = Variable(torch.FloatTensor(bs, pre_nms_limit, 4).cuda()) anchors_trim = Variable(torch.FloatTensor(bs, pre_nms_limit, 4).cuda()) # index two-dim (out_of_mem if directly index order.data) for i in range(bs): deltas_trim[i] = deltas[i][order.data[i], :] anchors_trim[i] = anchors[i][order.data[i], :] # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] # TODO (mid): nan or inf in initial iter boxes = apply_box_deltas(anchors_trim, deltas_trim) # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)] height, width = config.DATA.IMAGE_SHAPE[:2] window = np.array([0, 0, height, width]).astype(np.float32) window = Variable(torch.from_numpy(window).cuda(), requires_grad=False) boxes = clip_boxes(boxes, window) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression keep = nms(torch.cat((boxes, scores.unsqueeze(2)), 2).data, nms_threshold) keep = keep[:, :proposal_count] boxes_keep = Variable(torch.FloatTensor(bs, keep.shape[1], 4).cuda()) # bs, proposal_count(1000), 4 for i in range(bs): boxes_keep[i] = boxes[i][keep[i], :] # Normalize dimensions to range of 0 to 1. norm = Variable(torch.from_numpy(np.array([height, width, height, width])).float(), requires_grad=False).cuda() normalized_boxes = boxes_keep / norm return normalized_boxes # proposals
def proposal_layer(rpn_cls_prob_reshape_P2, rpn_bbox_pred_P2, \ rpn_cls_prob_reshape_P3, rpn_bbox_pred_P3, \ rpn_cls_prob_reshape_P4, rpn_bbox_pred_P4, \ rpn_cls_prob_reshape_P5, rpn_bbox_pred_P5, \ rpn_cls_prob_reshape_P6, rpn_bbox_pred_P6, \ im_info, cfg_train_key = True, _feat_strides = cfg.ZLRM.FPN_FEAT_STRIDE[2:], \ anchor_sizes = cfg.ZLRM.FPN_ANCHOR_SIZE[2:]): # anchor_scales = [8, 8, 8, 8, 8] """ Parameters ---------- rpn_cls_prob_reshape_P: (1 , H(P), W(P), A(P)x2) outputs of RPN, prob of bg or fg on pyramid layer P rpn_bbox_pred_P: (1 , H(P), W(P), A(P)x4), rgs boxes output of RPN on pyramid layer P im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_strides: the downsampling ratio of feature map to the original input image on each pyramid layer anchor_sizes: the absolute anchor sizes on each pyramid layer ---------- Returns ---------- rpn_rois : (sum(H x W x A), 5) e.g. [0, x1, y1, x2, y2] # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) """ anchor_scales = np.array(anchor_sizes) / np.array(_feat_strides) # _anchors = [generate_anchors(base_size=_feat_stride, scales=[anchor_scale]) for _feat_stride, anchor_scale in zip(_feat_strides, anchor_scales)] _anchors = [[], [], [], [], []] _anchors[0] = generate_anchors(base_size=_feat_strides[0], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[0]])) _anchors[1] = generate_anchors(base_size=_feat_strides[1], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[1]])) _anchors[2] = generate_anchors(base_size=_feat_strides[2], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[2]])) _anchors[3] = generate_anchors(base_size=_feat_strides[3], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[3]])) _anchors[4] = generate_anchors(base_size=_feat_strides[4], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[4]])) _num_anchors = [anchor.shape[0] for anchor in _anchors] im_info = im_info[0] #assert rpn_cls_prob_reshape.shape[0] == 1, \ # 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' if cfg_train_key==True: # print('使用TEST') pre_nms_topN = cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N # 12000 post_nms_topN = cfg.ZLRM.TRAIN.RPN_POST_NMS_TOP_N # 2000 nms_thresh = cfg.ZLRM.TRAIN.RPN_NMS_THRESH # 0.7 min_size = cfg.ZLRM.TRAIN.RPN_MIN_SIZE # 16 else: pre_nms_topN = cfg.ZLRM.TEST.RPN_PRE_NMS_TOP_N # 6000 post_nms_topN = cfg.ZLRM.TEST.RPN_POST_NMS_TOP_N # 300 nms_thresh = cfg.ZLRM.TEST.RPN_NMS_THRESH # 0.7 min_size = cfg.ZLRM.TEST.RPN_MIN_SIZE # 16 rpn_cls_prob_reshapes = [rpn_cls_prob_reshape_P2, rpn_cls_prob_reshape_P3, rpn_cls_prob_reshape_P4, rpn_cls_prob_reshape_P5, rpn_cls_prob_reshape_P6] bbox_deltas = [rpn_bbox_pred_P2, rpn_bbox_pred_P3, rpn_bbox_pred_P4, rpn_bbox_pred_P5, rpn_bbox_pred_P6] heights = [rpn_cls_prob_reshape.shape[1] for rpn_cls_prob_reshape in rpn_cls_prob_reshapes] widths = [rpn_cls_prob_reshape.shape[2] for rpn_cls_prob_reshape in rpn_cls_prob_reshapes] # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (4, 1, H, W, A(x)) --> (1, H, W, stack(A)) scores = [np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchor, 2])[:,:,:,:,1], [-1, 1]) for height, width, rpn_cls_prob_reshape, _num_anchor in zip(heights, widths, rpn_cls_prob_reshapes, _num_anchors)] # scores are (1 * H(P) * W(P) * A(P), 1) format # reshape to (sum(1 * H * W * A), 1) where rows are ordered by (h, w, a) scores = np.concatenate(scores, axis=0) if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print('score map size: {}'.format(scores.shape)) def gen_shift(height, width, _feat_stride): # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() return shift shifts = [gen_shift(height, width, _feat_stride) for height, width, _feat_stride in zip(heights, widths, _feat_strides)] # Enumerate all shifted anchors: # # add A anchors (4, 1, A(x), 4) to # cell K shifts (4, K, 1, 4) to get # shift anchors (4, K, A(x), 4) # reshape to (K*stack(A), 4) shifted anchors As = _num_anchors Ks = [shift.shape[0] for shift in shifts] anchors = [_anchor.reshape((1, A, 4)) + shift.reshape((1, K, 4)).transpose((1, 0, 2)) for A, K, _anchor, shift in zip(As, Ks, _anchors, shifts)] anchors = [anchor.reshape((K * A, 4)) for anchor, A, K in zip(anchors, As, Ks)] anchors = np.concatenate(anchors, axis=0) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A(x), H, W) format # transpose to (1, H, W, 4 * A(x)) # reshape to (1 * H * W * A(x), 4) where rows are ordered by (h, w, a) # in slowest to fastest order #bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4) bbox_deltas = [bbox_delta.reshape((-1, 4)) for bbox_delta in bbox_deltas] bbox_deltas = np.concatenate(bbox_deltas, axis=0) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) rpn_rois = blob if cfg_train_key == False: # assign rois to level Pk (P2 ~ P6) def calc_level(width, height): return min(6, max(2, int(4 + np.log2(np.sqrt(width * height) / 224)))) level = lambda roi : calc_level(roi[3] - roi[1], roi[4] - roi[2]) # roi: [0, x0, y0, x1, y1] leveled_rois = [None] * 5 leveled_idxs = [[], [], [], [], []] for idx, roi in enumerate(rpn_rois): level_idx = level(roi) - 2 leveled_idxs[level_idx].append(idx) for level_idx in range(0, 5): leveled_rois[level_idx] = rpn_rois[leveled_idxs[level_idx]] rpn_rois = np.concatenate(leveled_rois, axis=0) return leveled_rois[0], leveled_rois[1], leveled_rois[2], leveled_rois[3], leveled_rois[4], rpn_rois return rpn_rois
def proposal_layer(inputs, proposal_count, nms_threshold, anchors, config=None): """Receives anchor scores and selects a subset to pass as proposals to the second stage. Filtering is done based on anchor scores and non-max suppression to remove overlaps. It also applies bounding box refinment detals to anchors. Inputs: rpn_probs: [batch, anchors, (bg prob, fg prob)] rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] Returns: Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)] """ # Currently only supports batchsize 1 inputs[0] = inputs[0].squeeze(0) inputs[1] = inputs[1].squeeze(0) # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1] scores = inputs[0][:, 1] # Box deltas [batch, num_rois, 4] deltas = inputs[1] std_dev = Variable(torch.from_numpy(np.reshape(config.RPN.BBOX_STD_DEV, [1, 4])).float(), requires_grad=False) if config.GPU_COUNT: std_dev = std_dev.cuda() deltas = deltas * std_dev # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. pre_nms_limit = min(6000, anchors.size()[0]) scores, order = scores.sort(descending=True) order = order[:pre_nms_limit] scores = scores[:pre_nms_limit] deltas = deltas[order.data, :] # TODO: Support batch size > 1 ff. anchors = anchors[order.data, :] # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = apply_box_deltas(anchors, deltas) # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)] height, width = config.TRAIN.IMAGE_SHAPE[:2] window = np.array([0, 0, height, width]).astype(np.float32) boxes = clip_boxes(boxes, window) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression keep = nms(torch.cat((boxes, scores.unsqueeze(1)), 1).data, nms_threshold) keep = keep[:proposal_count] boxes = boxes[keep, :] # Normalize dimensions to range of 0 to 1. norm = Variable(torch.from_numpy(np.array([height, width, height, width])).float(), requires_grad=False) if config.GPU_COUNT: norm = norm.cuda() normalized_boxes = boxes / norm # Add back batch dimension normalized_boxes = normalized_boxes.unsqueeze(0) return normalized_boxes
im2show = np.copy(im) for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes):
def _proposal_layer_py(rpn_bbox_cls_prob, rpn_bbox_pred, im_dims, cfg_key, _feat_stride, anchor_scales): ''' # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) ''' _anchors = generate_anchors( scales=np.array(anchor_scales)) #anchor_scales(8,16,32) _num_anchors = _anchors.shape[0] rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2]) #(n,18,H,W) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) #(n,36,H,W) # Only minibatch of 1 supported assert rpn_bbox_cls_prob.shape[0] == 1, \ 'Only single item batches are supported' if cfg_key == 'TRAIN': pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N nms_thresh = cfg.TRAIN.RPN_NMS_THRESH min_size = cfg.TRAIN.RPN_MIN_SIZE else: # cfg_key == 'TEST': pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N nms_thresh = cfg.TEST.RPN_NMS_THRESH min_size = cfg.TEST.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_bbox_cls_prob[:, _num_anchors:, :, :] #(n,9,H,W) bbox_deltas = rpn_bbox_pred #(n,36,H,W) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) #(1*h*w*a,4) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) #(1*h*w*a,4) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_dims) #(1*h*w*a,4) # 3. remove predicted boxes with either height or width < threshold keep = _filter_boxes(proposals, min_size) proposals = proposals[keep, :] #(-1,4) scores = scores[keep] #(-1,4) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) #(n,5) return blob
for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write( 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r'.format( num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush()
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key=True, _feat_stride=cfg.ZLRM.RESNET_50_FEAT_STRIDE, anchor_scales=cfg.ZLRM.ANCHOR_SCALE): """ Parameters ---------- rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) """ _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] # rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) #-> (1 , 2xA, H , W) # rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2]) # -> (1 , Ax4, H , W) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' # print('========================', cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N) # print('===================', cfg_key) if cfg_key == True: # print('使用TEST') pre_nms_topN = cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N # 12000 post_nms_topN = cfg.ZLRM.TRAIN.RPN_POST_NMS_TOP_N # 2000 nms_thresh = cfg.ZLRM.TRAIN.RPN_NMS_THRESH # 0.7 min_size = cfg.ZLRM.TRAIN.RPN_MIN_SIZE # 16 else: pre_nms_topN = cfg.ZLRM.TEST.RPN_PRE_NMS_TOP_N # 6000 post_nms_topN = cfg.ZLRM.TEST.RPN_POST_NMS_TOP_N # 300 nms_thresh = cfg.ZLRM.TEST.RPN_NMS_THRESH # 0.7 min_size = cfg.ZLRM.TEST.RPN_MIN_SIZE # 16 height, width = rpn_cls_prob_reshape.shape[1:3] # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (1, H, W, A) scores = np.reshape( np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:, :, :, :, 1], [1, height, width, _num_anchors]) # TODO: NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! # TODO: if you use the old trained model, VGGnet_fast_rcnn_iter_70000.ckpt, uncomment this line # scores = rpn_cls_prob_reshape[:,:,:,_num_anchors:] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # # remove irregular boxes, too fat too tall # keep = _filter_irregular_boxes(proposals) # proposals = proposals[keep, :] # scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # dets = np.hstack((blob, scores)).astype(np.float32) # print(dets.shape) # print('jjjjj=============', dets[:, -1]) return blob, scores
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, _feat_stride=[ cfg["ANCHOR_WIDTH"], ]): """ Parameters ---------- rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) """ _anchors = generate_anchors() # 生成基本的10个anchor _num_anchors = _anchors.shape[0] # 10个anchor im_info = im_info[0] # 原始图像的高宽、缩放尺度 assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' pre_nms_topN = cfg["TEST"][ "RPN_PRE_NMS_TOP_N"] # 12000,在做nms之前,最多保留的候选box数目 post_nms_topN = cfg["TEST"][ "RPN_POST_NMS_TOP_N"] # 2000,做完nms之后,最多保留的box的数目 nms_thresh = cfg["TEST"]["RPN_NMS_THRESH"] # nms用参数,阈值是0.7 min_size = cfg["TEST"]["RPN_MIN_SIZE"] # 候选box的最小尺寸,目前是16,高宽均要大于16 height, width = rpn_cls_prob_reshape.shape[1:3] # feature-map的高宽 width = width // 10 # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (1, H, W, A) # 获取第一个分类结果 scores = np.reshape( np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:, :, :, :, 1], [1, height, width, _num_anchors]) # 提取到object的分数,non-object的我们不关心 # 并reshape到1*H*W*10 bbox_deltas = rpn_bbox_pred # 模型输出的pred是相对值,需要进一步处理成真实图像中的坐标 # Enumerate all shifts # 同anchor-target-layer-tf这个文件一样,生成anchor的shift,进一步得到整张图像上的所有anchor shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride # shift_x shape = [height, width] # 生成同样维度的两个矩阵 shift_x, shift_y = np.meshgrid(shift_x, shift_y) # print("shift_x", shift_x.shape) # print("shift_y", shift_y.shape) # shifts shape = [height*width,4] shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors # 10 K = shifts.shape[0] # height*width,[height*width,4] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # 这里得到的anchor就是整张图像上的所有anchor # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.reshape((-1, 4)) # (HxWxA, 4) # Same story for the scores: scores = scores.reshape((-1, 1)) # TODO: 回归2个值需要进行修改 # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 做逆变换,得到box在图像上的真实坐标 # TODO: 回归2个值需要进行修改 # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 将所有的proposal修建一下,超出图像范围的将会被修剪掉 # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size) # 移除那些proposal小于一定尺寸的proposal proposals = proposals[keep, :] # 保留剩下的proposal scores = scores[keep] bbox_deltas = bbox_deltas[keep, :] print('proposals1', proposals.shape) score_filter = np.where(scores > 0.0)[0] proposals = proposals[score_filter, :] scores = scores[score_filter] bbox_deltas = bbox_deltas[score_filter, :] print('proposals2', proposals.shape) # remove irregular boxes, too fat too tall # keep = _filter_irregular_boxes(proposals) # proposals = proposals[keep, :] # scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] # score按得分的高低进行排序 if pre_nms_topN > 0: # 保留12000个proposal进去做nms order = order[:pre_nms_topN] # print('proposals3', proposals.shape) proposals = proposals[order, :] scores = scores[order] bbox_deltas = bbox_deltas[order, :] print('proposals3', proposals.shape) s = time.time() # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) # 进行nms操作,保留2000个proposal print(time.time() - s) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] bbox_deltas = bbox_deltas[keep, :] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 blob = np.hstack( (scores.astype(np.float32, copy=False), proposals.astype(np.float32, copy=False))) return blob, bbox_deltas