def select_bg(self, Phi_labels, boxes, labels, bbox_pred, keeps_Y, good_gt_overlap, M, im_shape_w, im_shape_h): """ Find B in p(B|Xb) """ selected_item = range(M) prob_dpp = np.ones((M,)) ignores = [] dict_keeps_Y = {} for i, j in keeps_Y.iteritems(): if j not in dict_keeps_Y: dict_keeps_Y[j] = [] dict_keeps_Y[j].append(i) for k in range(M): if (k in keeps_Y and keeps_Y[k] == Phi_labels[k]) \ or (k in good_gt_overlap and Phi_labels[k] == labels[k] and labels[k] > 0): ignores.append(k) else: label_k = labels[k] if label_k in dict_keeps_Y: loc_lbl = bbox_pred[[k], 4 * label_k:4 * (label_k + 1)] pbox = bbox_transform_inv(boxes[[k], :], loc_lbl) pbox = clip_boxes(pbox, (im_shape_w, im_shape_h)) pbox = np.reshape(np.tile(pbox, len(dict_keeps_Y[label_k])), (len(dict_keeps_Y[label_k]), 4)) Y_selected_ll = bbox_pred[dict_keeps_Y[label_k], 4 * label_k:4 * (label_k + 1)] Y_selected_pbox = bbox_transform_inv(boxes[dict_keeps_Y[label_k], :], Y_selected_ll) Y_selected_pbox = clip_boxes(Y_selected_pbox, (im_shape_w, im_shape_h)) if np.max(IoU_target(pbox, Y_selected_pbox)) > cfg.TRAIN.IGNORANCE: ignores.append(k) selected_item = np.array([x for ii, x in enumerate(selected_item) if ii not in ignores]) prob_dpp = [x for ii, x in enumerate(prob_dpp) if ii not in ignores] return selected_item, prob_dpp
def im_detect(sess, net, im): finalscores = [] finalpredboxes = [] # [[480, 800], [576, 900], [688, 1100], [800, 1200], [1200, 1600], [1400, 2000]] for scale in [576, 688, 800]: blobs, im_scales = _get_blobs(im, scale) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array( [im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) _, scores, bbox_pred, rois, _, scores_hm, bbox_pred_hm = net.test_image( sess, blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) pred_boxes_hm = bbox_transform_inv(boxes, bbox_pred_hm) pred_boxes_hm = _clip_boxes(pred_boxes_hm, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) finalscores.extend([scores[:, :2]]) finalpredboxes.extend([pred_boxes[:, :8]]) ####### 1. Additional box with diff scores # finalscores.extend([scores[:, :2], scores_hm]) # finalpredboxes.extend([pred_boxes[:, :8], pred_boxes[:, :8]]) # finalpredboxes.extend([pred_boxes[:, :8], pred_boxes_hm[:, :8]]) ####### 2. Arithmetic mean # finalscores.extend([(scores[:, :2] + scores_hm) / 2]) # finalpredboxes.extend([pred_boxes[:, :8]]) # finalpredboxes.extend([pred_boxes_hm[:, :8]]) ####### 3. Geometrical mean # finalscores.extend([np.sqrt(scores[:, :2] * scores_hm)]) # finalpredboxes.extend([pred_boxes[:, :8]]) # finalpredboxes.extend([pred_boxes_hm[:, :8]]) ####### 4. Harmonic mean # finalscores.extend([2 * scores[:, :2] * scores_hm / (scores[:, :2] + scores_hm + 1e-9)]) # finalpredboxes.extend([pred_boxes[:, :8]]) return np.concatenate(finalscores), np.concatenate(finalpredboxes)
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors, rpn_reject_inds): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N im_info = im_info[0] scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] ######################REJECT VIA RPN################ ###------------------------reject process---------------------------### if rpn_reject_inds.size != 0: reject_inds = np.unique(rpn_reject_inds) scores[reject_inds] = -2 passinds = np.where(scores != -2)[0] #reject via frcn and rpn anchors = anchors[passinds] scores = scores[passinds] rpn_bbox_pred = rpn_bbox_pred[passinds] ###-------------------------reject done-----------------------------### ##################################################### length = scores.shape[0] if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: top_inds = scores.argsort(0)[::-1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def im_detect(sess, net, im): blobs, im_scales = _get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array( [im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) # 测试网络 _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info']) # boxes:rpn层预测的区域 boxes = rois[:, 1:5] / im_scales[0] # scores:回归层的softmax值 scores = np.reshape(scores, [scores.shape[0], -1]) # bbox_pred:回归层的boxes预测坐标 bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) # 默认true if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred # 从偏移量映射回真实坐标 [dx,dy,dw,dh]->[xmin,ymin,xmax,ymax] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes
def im_detect(net, blobs): """Return - scores : (num_rois, num_classes) - pred_boxes : (num_rois, num_classes * 4) [xyxy] in original image size - net_conv : Variable cuda (1, 1024, H, W) - im_scale : float """ #blobs, im_scales = _get_blobs(im) #assert len(im_scales) == 1, "Only single-image batch implemented" #m_blob = blobs['data'] #blobs['im_info'] = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) #blobs = self.loader.getBatch('train') #### _, scores, bbox_pred, rois, net_conv = net.test_image(blobs) #test_image(self, image, im_info, labels, file_name) #print('scores:', scores.shape, 'bbox_pred:', bbox_pred.shape, 'rois:', rois.shape) # scores: (300, 81) bbox_pred: (300, 324) rois: (300, 5) boxes = rois[:, 1:5] / blobs['im_info'][0][2] # (n, 4) scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) # (n, C*4) if cfg.TEST.BBOX_REG: #### # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() im_shape = (round(blobs['im_info'][0][0]/blobs['im_info'][0][2]), round(blobs['im_info'][0][1]/blobs['im_info'][0][2]), 3) #### pred_boxes = _clip_boxes(pred_boxes, im_shape) # (n, C*4) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes, net_conv, blobs['im_info'][0][2]
def im_detect(sess, net, im): blobs, im_scales = _get_blobs(im) # 将图片 im 转为网络要求输入的 blob assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array( [im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) # blobs['data']为<class 'tuple'>: (1, 600, 800, 3) # blobs['im_info']为<class 'tuple'>: (3,),值为[600. 800. 1.6] _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] # 对产生的rois进行缩放,缩放过后的rois正好可以对应于原始图像上的rois scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes
def im_detect(sess, net, im): blobs, im_scales = _get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] # seems to have height, width, and image scales # still not sure about the scale, maybe full image it is 1. blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes
def im_detect(sess, net, im): '''运行测试网络,返回scores,并得到bbox的四个顶点坐标''' # blobs, im_scales = _get_blobs(im) # 将图像缩放,转换成四维数组 assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] # 维度() blobs['im_info'] = np.array( [im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info']) # 测试网络, # scores维度(300,21)21个类别,20类和一个背景 # bbox_pred维度(300,5), boxes = rois[:, 1:5] / im_scales[0] # 把缩放后的尺寸还原到原尺寸 # 选取rois每列的第2到第5共四个元素,region of interest scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if cfg.TEST.BBOX_REG: # 暂时还不太懂 # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) # 得到bbox四个点点的坐标 pred_boxes = _clip_boxes(pred_boxes, im.shape) # 裁剪bbox到图像范围以内 else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes
def im_detect_caption(sess, net, im): blobs, im_scales = _get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) # Anh them vo _, scores, bbox_pred, generated_sentences, rois = net.test_image_caption(sess, blobs['data'], blobs['im_info']) # if cfg.DEBUG_VERBOSE: # for i in range(len(generated_sentences)): # print('----- Generated sentence i={} value: {}'.format(i, generated_sentences[i])) ## now is all zeros boxes = rois[:, 1:5] / im_scales[0] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes, generated_sentences
def freeze_graph_test(sess, blobs): ''' :param pb_path:pb文件的路径 :param image_path:测试图片的路径 :return: ''' # 定义输入的张量名称,对应网络结构的输入张量 # input:0作为输入图像,keep_prob:0作为dropout的参数,测试时值为1,is_training:0训练参数 # 定义输出的张量名称 input_image_tensor = sess.graph.get_tensor_by_name("Placeholder:0") tensor_info = sess.graph.get_tensor_by_name("Placeholder_1:0") biasadd = sess.graph.get_tensor_by_name("vgg_16_3/cls_score/BiasAdd:0") score = sess.graph.get_tensor_by_name("vgg_16_3/cls_prob:0") bbox = sess.graph.get_tensor_by_name("add:0") rois = sess.graph.get_tensor_by_name("vgg_16_1/rois/concat:0") _, scores, bbox_pred, rois = sess.run([biasadd, score, bbox, rois], feed_dict={ input_image_tensor: blobs['data'], tensor_info: blobs['im_info'] }) im_scales = blobs['im_info'][2] boxes = rois[:, 1:5] / im_scales scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, (255, 255, 0)) return scores, pred_boxes
def update_rl(rl_in, h_start, w_start, h_end, w_end, t, rois_seq, cls_probs_seq, bbox_preds_seq, cls_probs_uptonow, pred_bboxes_uptonow, keeps, im_shape, bin_ctrs, height, width, rl_in_upsamp_height, rl_in_upsamp_width, thresh=0.0): if t > 1: cls_probs_uptonow = cls_probs_uptonow[keeps[0], :] pred_bboxes_uptonow = pred_bboxes_uptonow[keeps[0], :] keeps[0] = [] # Potentially perform per-time-step NMS if rois_seq is not None: # Current preds at this fix merged with survivors from previous steps cls_probs_uptonow = np.vstack([cls_probs_uptonow, cls_probs_seq]) pred_bboxes = bbox_transform_inv(rois_seq, bbox_preds_seq) pred_bboxes = clip_boxes(pred_bboxes, im_shape) pred_bboxes_uptonow = np.vstack([pred_bboxes_uptonow, pred_bboxes]) # Perform on-the-fly NMS (used when performing class-specific history updates) keeps = _get_nms_keep(keeps, cls_probs_uptonow, pred_bboxes_uptonow, thresh) # Update non-history part of RL state rl_in[:, h_start:h_end, w_start:w_end, :cfg.DIMS_NONHIST] = -1 if rois_seq is not None: rl_in, _ = do_hist_update(rl_in, cls_probs_uptonow, pred_bboxes_uptonow, keeps, bin_ctrs, height, width, rl_in_upsamp_height, rl_in_upsamp_width) return rl_in, keeps, cls_probs_uptonow, pred_bboxes_uptonow
def spatial_fc7_to_prediction(self, spatial_fc7, im_info, ori_boxes): """Only used for testing. Testing the above box_to_fc7 [passed]""" cls_prob, bbox_pred = self.net._region_classification(spatial_fc7) # make rois batch_inds = Variable(spatial_fc7.data.new(ori_boxes.shape[0], 1).zero_()) scaled_boxes = (ori_boxes * im_info[0][2]).astype(np.float32) scaled_boxes = Variable(torch.from_numpy(scaled_boxes).cuda()) rois = torch.cat([batch_inds, scaled_boxes], 1) # add mean and std to bbox_pred if any if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: stds = bbox_pred.data.new(cfg.TRAIN.BBOX_NORMALIZE_STDS).repeat(self.num_classes).unsqueeze(0).expand_as(bbox_pred) means = bbox_pred.data.new(cfg.TRAIN.BBOX_NORMALIZE_MEANS).repeat(self.num_classes).unsqueeze(0).expand_as(bbox_pred) bbox_pred = bbox_pred.mul(Variable(stds)).add(Variable(means)) # convert to numpy scores = cls_prob.data.cpu().numpy() rois = rois.data.cpu().numpy() bbox_pred = bbox_pred.data.cpu().numpy() # regress boxes boxes = rois[:, 1:5] / im_info[0][2] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() pred_boxes = self._clip_boxes(pred_boxes, im_info[0][:2]) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes
def im_detect(net, im): blobs, im_scales = _get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array( [im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) _, scores, bbox_pred, rois = net.test_image(blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv( torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes
def im_detect(net, im): """Return - scores : (num_rois, num_classes) - pred_boxes : (num_rois, num_classes * 4) [xyxy] in original image size - net_conv : Variable cuda (1, 1024, H, W) - im_scale : float """ blobs, im_scales = _get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) _, scores, bbox_pred, rois, net_conv = net.test_image(blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] # (n, 4) scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) # (n, C*4) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() pred_boxes = _clip_boxes(pred_boxes, im.shape) # (n, C*4) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes, net_conv, im_scales[0]
def im_detect(net, im): blobs, im_scales = _get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array( [im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) _, scores, bbox_pred, rois = net.test_image(blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes
def im_detect(self, im): blobs, im_scales = model_utils._get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) anchors, anchor_length = self.generate_anchors(blobs['im_info']) feed = { self._images: blobs['data'] } rpn_cls_prob, rpn_bbox_pred, rpn_feature_map = self.rpn_sess.run([self._rpn_cls_prob, self._rpn_bbox_pred, self._rpn_feature_map], feed) rois, _ = proposal_layer(rpn_cls_prob, rpn_bbox_pred, blobs['im_info'], 'TEST', self._feat_stride, anchors, self._num_anchors) rois = np.reshape(rois, [-1, 5]) # rpn_scores = np.reshape(rpn_scores, [-1, 1]) feed = { self._rcnn_feature_map: rpn_feature_map, self._rois: rois } scores, bbox_pred = self.rcnn_sess.run([self._cls_prob, self._bbox_pred], feed) boxes = rois[:, 1:5] / im_scales[0] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = self._clip_boxes(pred_boxes, im.shape) return scores, pred_boxes
def im_detect(sess, net, im): blobs, im_scales = _get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] # seems to have height, width, and image scales # still not sure about the scale, maybe full image it is 1. blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] # boxes:rpn层预测的区域 # print(scores.shape, bbox_pred.shape, rois.shape, boxes.shape) scores = np.reshape(scores, [scores.shape[0], -1]) # scores:回归层的softmax值 bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) # bbox_pred:回归层的boxes预测坐标 if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas # 从偏移量映射回真实坐标 [dx,dy,dw,dh]->[xmin,ymin,xmax,ymax] box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes
def predict(self, img_path): # return scores/probs (num_rois, 81), pred_boxes (num_rois, 81*4) # in numpy im = cv2.imread(img_path) blobs, im_scales = self._get_blobs(im) im_blob = blobs['data'] # (1, iH, iW, 3) blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) # test_image returns cls_score, cls_prob, bbox_pred, rois, net_conv # scores.shape=(300,81), bbox_pred.shape=(300,324) # rois.shape=(300,5), net_conv.shape=(1,1024,38,57) _, scores, bbox_pred, rois, net_conv = self.net.test_image( blobs['data'], blobs['im_info']) # boxes.shape=(300,81) boxes = rois[:, 1:5] / im_scales[0] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv( torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() pred_boxes = self._clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes
def im_detect_fast(sess, net, im): finalscores = [] finalpredboxes = [] # [[480, 800], [576, 900], [688, 1100], [800, 1200], [1200, 1600], [1400, 2000]] for scale in [688]: blobs, im_scales = _get_blobs(im, scale) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array( [im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) _, scores, bbox_pred, rois, cls_score_hm, cls_prob_hm, bbox_pred_hm = net.test_image( sess, blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) finalscores.append(scores) finalpredboxes.append(pred_boxes) return finalscores[0], finalpredboxes[0]
def im_detect(sess, net, im): finalscores=[] finalpredboxes=[] # [[480, 800], [576, 900], [688, 1100], [800, 1200], [1200, 1600], [1400, 2000]] for scale in [576,688,800]: blobs, im_scales = _get_blobs(im,scale) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) finalscores.append(scores) finalpredboxes.append(pred_boxes) return np.concatenate((finalscores[0],finalscores[1],finalscores[2])),np.concatenate((finalpredboxes[0],finalpredboxes[1],finalpredboxes[2]))
def proposal_mask_layer(rois, cls_prob, bbox_pred, im_info, num_classes, training, testing): image_info = im_info[0] boxes = rois[:, 1:5] / image_info[2] scores = np.reshape(cls_prob, [cls_prob.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes)) means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes)) bbox_pred *= stds bbox_pred += means box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, image_info[3:5]) mask_data_list = [] for ind in range(1, num_classes): if ind == 0: continue else: cls_boxes = pred_boxes[:, 4 * ind:4 * (ind + 1)] cls_boxes = cls_boxes * image_info[2] cls_scores = scores[:, ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) if training == 1 and testing == 0: keep = nms(dets, 0.7) dets = dets[keep, :] cls_ind = np.full((dets.shape[0]), ind, dtype=np.float32) batch_inds = np.zeros((dets.shape[0]), dtype=np.float32) dets = np.hstack( (batch_inds[:, np.newaxis], dets, cls_ind[:, np.newaxis])) mask_data_list.extend(dets.tolist()) if len(mask_data_list): if training == 1 and testing == 0: mask_batch = cfg.TRAIN.MASK_BATCH elif training == 0 and testing == 1: mask_batch = cfg.TEST.MASK_BATCH mask_data = np.array(mask_data_list, dtype=np.float32) mask_data = mask_data[np.argsort(mask_data[:, 5])[::-1]] if mask_data.shape[0] > mask_batch: mask_data = mask_data[0:mask_batch, :] else: mask_data = None return mask_data
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): ''' A simplified version compared to fast/er RCNN For details please see the technical report :param rpn_cls_prob: :param rpn_bbox_pred: :param im_info: [M,N,scale_factor]保存了将任意图像缩放到M×N的所有信息 :param cfg_key: :param _feat_stride:feat_stride=16用于计算anchor的偏移量 :param anchors: :param num_anchors: :return: ''' if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # 计算得到bbox四个顶点坐标 proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals ''' 按照输入的foreground softmax降序排列,提取前pre_nms_topN(6000)的结果 提取修正后的foreground anchor ''' order = scores.ravel().argsort()[::-1] # ravel数组扁平化,降序排列 if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] # anchor坐标 scores = scores[order] # anchor分数 # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS ''' 再次按照nms后的foreground softmax由大到小排列,提取前post_nms_topN(300)结果作为proposals的输出 ''' if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors, reject_inds_1, reject_inds_2): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) ######################REJECT VIA RPN################ ###------------------------reject process---------------------------### if reject_inds_1.size != 0: reject_inds_1 = np.unique(reject_inds_1) scores[reject_inds_1] = -2 if reject_inds_2.size != 0: reject_inds_2 = np.unique(reject_inds_2) scores[reject_inds_2] = -2 passinds = np.where(scores != -2)[0] #reject via frcn and rpn proposals = proposals[passinds] scores = scores[passinds] ###-------------------------reject done-----------------------------### ##################################################### # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ # cfg_key代表TRAIN还是TEST if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') ''' pre_nms_topN: 在NMS处理之前,分数在前面的rois post_nms_topN: 在NMS处理之后,分数在前面的rois nms_thresh: NMS的阈值 ''' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # __C.TRAIN.RPN_NMS_THRESH = 0.7 # Get the scores and bounding boxes # 其中第四维度前9位是背景的分数,后9位是前景的分数 # 假设rpn_cls_prob = (1,38,50,18) scores = rpn_cls_prob[:, :, :, num_anchors:] # scores = (1,38,50,9) rpn_bbox_pred = rpn_bbox_pred.reshape( (-1, 4)) # rpn_bbox_pred = (1,38,50,36)->(17100,4) scores = scores.reshape((-1, 1)) # scores = (17100,1) # bbox_transform_inv 根据anchor和偏移量计算proposals proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # clip_boxes作用:调整boxes的坐标,使其全部在图像的范围内 proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals # 首先变成一维,然后argsort返回数组值从小到大的索引值,然后加上[::-1],翻转序列 # order保存数组值从大到小的索引值 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: # 只取前pre_nms_topN order = order[:pre_nms_topN] # order对应的是下标,然后把得分最高的前pre_nms_topN的区域保存 proposals = proposals[order, :] # 只保存前pre_nms_topN个得分 scores = scores[order] # Non-maximal suppression # 非极大值抑制 np.hstack把他们拼接成(区域 分数)的形式 keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS # 在nms之后,选择前post_nms_topN个 if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input # 这点多出来一个batch_inds,拼接之后blob的第一列全是0,不知道后面是不是有什么操作。。。 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors, gt_boxes, gt_texts, gt_pair): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view(-1, 1) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # Till now, proposals and scores consists RPN_PRE_NMS_TOP_N (12000/6000) top anchor_regions # only by the rpn_cls_prob ''' if cfg.mode=='TRAIN': overlaps = bbox_overlaps( proposals.data, gt_boxes[:, :4].data) max_overlaps, gt_assignment = overlaps.max(1) labels = gt_boxes[gt_assignment, [4]] texts = [gt_texts[i] for i in gt_assignment] pair = torch.LongTensor([int(gt_pair[i]) for i in gt_assignment]).cuda() print("labels") print(labels) print("pair") print(pair) ''' # Non-maximal suppression keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_()) blob = torch.cat((batch_inds, proposals), 1) return blob, scores
def unnormalize_box(Phi_labels, bbox_targets, boxes, stds, means,M, im_shape_w, im_shape_h): """ un-normalize boxes by using stds and means """ Phi_argmax = 4 * Phi_labels bbox_target = bbox_targets[np.tile(range(M),4),np.hstack((4*Phi_labels,4*Phi_labels+1,4*Phi_labels+2, 4*Phi_labels+3))] bbox_target = np.reshape(bbox_target,(M,4),order='F') bbox_target = bbox_target * stds[Phi_argmax/4,:] + means[Phi_argmax/4,:] unnormalized_bbox_targets = bbox_transform_inv(boxes, bbox_target) unnormalized_bbox_targets = clip_boxes(unnormalized_bbox_targets, (im_shape_w,im_shape_h)) return unnormalized_bbox_targets
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH blob = [] scores = [] # Get the scores and bounding boxes for im_i in np.arange(im_info.shape[0]): scores_im_i = rpn_cls_prob[im_i, :, :, num_anchors:].copy().reshape( (-1, 1)) rpn_bbox_pred_im_i = rpn_bbox_pred[im_i].copy().reshape((-1, 4)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred_im_i) proposals = clip_boxes(proposals, im_info[im_i, :2]) # Pick the top region proposals order = scores_im_i.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores_im_i = scores_im_i[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores_im_i)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] # not filter out the remaining, but re-ranking and then clip scores_im_i[keep] += 2 order = scores_im_i.ravel().argsort()[::-1] keep = order[:post_nms_topN] proposals = proposals[keep, :] scores_im_i = scores_im_i[keep] scores.append(scores_im_i) # multi image as input batch_inds = im_i * np.ones((proposals.shape[0], 1), dtype=np.float32) blob_im_i = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) blob.append(blob_im_i) scores = np.concatenate(scores) blob = np.concatenate(blob) return blob, scores
def im_detect(self, sess, net, image): """ :param sess: Tensor flow session :param net: ConvNet :param im: A PIL Image :return: The scores and BB in a tuple with shapes (300x21), (300x84) """ blobs, im_scales = self._get_blobs(image) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) # # Here is where the magic starts # _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = FasterRCNN._clip_boxes(pred_boxes, np.array(image).shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) tmp = [] for j, cls in enumerate(self.imdb.classes): if j == 0: continue inds = np.where(scores[:, j] > self.thresh)[0] cls_scores = scores[inds, j] cls_boxes = pred_boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] cls_dets = np.insert(cls_dets, 5, j, axis=1) threshold_filter = np.where(cls_dets[:, 4] >= cfg.TEST.NMS)[0] tmp.append(cls_dets[threshold_filter, :]) return np.vstack(tmp)
def inference_faster(im_file): #change the image to blob im = cv2.imread(im_file) blobs, im_scales = _get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array( [im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) sess = tf.Session() with gfile.FastGFile(pb_file_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') sess.run(tf.global_variables_initializer()) image = sess.graph.get_tensor_by_name('Placeholder:0') image_info = sess.graph.get_tensor_by_name('Placeholder_1:0') gt = sess.graph.get_tensor_by_name("Placeholder_2:0") score = sess.graph.get_tensor_by_name( 'SCORE/vgg_16_3/cls_prob/cls_prob/scores:0') bbox = sess.graph.get_tensor_by_name( 'SCORE/vgg_16_3/bbox_pred/BiasAdd/bbox_pred/scores:0') rand_array = np.random.rand(1024, 5) print("---------------------------------------------") print(blobs["data"].dtype) x_c = tf.constant(rand_array, dtype=tf.float32) #print(sess.run("Placeholder:0")) _, scores, bbox_pred, rois = sess.run([score, bbox], feed_dict={ image: blobs['data'], image_info: blobs['im_info'], gt: x_c }) #print(ret) boxes = rois[:, 1:5] / im_scales[0] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) return scores, pred_boxes
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report Parameters: - rpn_cls_prob : (1, H, W, 2A) float Variable - rpn_bbox_pred: (1, H, W, 4A) - im_info : [im_height, im_width, scale], ndarray (3, ) - cfg_key : train or test - _feat_stride : 16 - anchors : (HWA, 4) float Variable - num_anchors : A = 9 Returns: - blob : Variable (N_nms, 5) [0; x1y1x2h2] - scores : Variable (N_nms, ) """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # train 12000; test 6000 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # train 2000 ; test 300 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] # (1, H, W, A) pos score only rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) # (HWA, 4) scores = scores.contiguous().view(-1, 1) # (HWA, 1) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # (HWA, 4) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # Non-maximal suppression # changes by me keep = nms(proposals, scores[:, 0], nms_thresh) # keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_()) blob = torch.cat((batch_inds, proposals), 1) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, anchors, num_anchors): """ A simplified version compared to fast/er RCNN For details please see the technical report :param rpn_cls_prob: (1, H, W, Ax2) softmax result of rpn scores rpn_bbox_pred: (1, H, W, Ax4) 1x1 conv result for rpn bbox """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes for foreground (text) # The order in last dim is related to network.py: # self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") # scores = rpn_cls_prob[:, :, :, num_anchors:] # old height, width = rpn_cls_prob.shape[1:3] # feature-map的高宽 scores = np.reshape( np.reshape(rpn_cls_prob, [1, height, width, num_anchors, 2])[:, :, :, :, 1], [1, height, width, num_anchors]) rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh, not cfg.USE_GPU_NMS) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input blob = np.hstack( (scores.astype(np.float32, copy=False), proposals.astype(np.float32, copy=False))) return blob, scores
def im_detect(net, im, label=None): #im = cv2.imread(imdb.image_path_at(i)) ori_img = im.copy() blobs, im_scales = _get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) # scores(300,num_classes) bbox_pred(300, num_classes*4) rois(300,4) # 对于300个roi,每个roi 4个值 x1 y1 x2 y2 # score是对于每个roi,属于各个类别的概率(经过softmax后)(0.1 0.2 0.1.....) # bbox_pred是每个roi对于每个类别的坐标偏移 即同一个roi经过不同的偏移后可以属于多个类别 if cfg.DO_PARSING: _, scores, bbox_pred, rois, mask_score_map = net.test_image(blobs['data'], blobs['im_info']) else: _, scores, bbox_pred, rois = net.test_image(blobs['data'], blobs['im_info']) # for i in range(rois.shape[0]): # box = rois[i, 1:5] / im_scales[0] # box = box.astype(np.int64) # proposal = ori_img[box[1]:box[3],box[0]:box[2],:] # cv2.imwrite('/media/rgh/rgh-data/Dataset/CVPR2018/Lip/rois/val/'+str(i)+'.png',proposal) # print(rois.shape) # print(rois[0:10,:]) boxes = rois[:, 1:5] / im_scales[0] # (300,num_classes) scores = np.reshape(scores, [scores.shape[0], -1]) # (300,num_classes) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) # (300, num_classes*4) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred # 每个roi对于不同类别进行对应偏移 pred_boxes = bbox_transform_inv(torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() pred_boxes = _clip_boxes(pred_boxes, im.shape) # (300, num_classes*4) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # test时不再回归了 (300, num_classes*4) # for i in range(pred_boxes.shape[0]): # for j in range(1,12): # if scores[i][j] > 0: # proposal = ori_img[int(pred_boxes[i][j*4+1]):int(pred_boxes[i][j*4+3]), # int(pred_boxes[i][j * 4 + 0]):int(pred_boxes[i][j*4+2]), :] # cv2.imwrite('/media/rgh/rgh-data/Dataset/CVPR2018/Lip/rois/val/' + str(i)+'_'+str(j) + '_'+str(scores[i][j])+ '.png', proposal) # # for i in range(rois.shape[0]): # box = rois[i, 1:5] / im_scales[0] # box = box.astype(np.int64) # proposal = ori_img[box[1]:box[3],box[0]:box[2],:] # cv2.imwrite('/media/rgh/rgh-data/Dataset/CVPR2018/Lip/rois/val/'+str(i)+'.png',proposal) if cfg.DO_PARSING: return scores, pred_boxes, mask_score_map return scores, pred_boxes
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view(-1, 4) scores = scores.contiguous().view(-1, 1) length = scores.size(0) if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = torch.from_numpy( npr.choice(length, size=rpn_top_n, replace=True)).long().to(anchors.device) else: top_inds = scores.sort(0, descending=True)[1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.view(rpn_top_n) # Do the selection here anchors = anchors[top_inds, :].contiguous() rpn_bbox_pred = rpn_bbox_pred[top_inds, :].contiguous() scores = scores[top_inds].contiguous() # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = proposals.new_zeros(proposals.size(0), 1) blob = torch.cat([batch_inds, proposals], 1) return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N im_info = im_info[0] scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: top_inds = scores.argsort(0)[::-1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view(-1, 1) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # Non-maximal suppression keep = nms(proposals, scores.squeeze(1), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input batch_inds = proposals.new_zeros(proposals.size(0), 1) blob = torch.cat((batch_inds, proposals), 1) return blob, scores
def im_detect(sess, net, im): blobs, im_scales = _get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] # seems to have height, width, and image scales # still not sure about the scale, maybe full image it is 1. blobs['im_info'] = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes