def get_boxes(self, name, phase): if phase == 'pred': bbox_pred = self._predictions['bbox_pred'] rois = self._predictions['rois'] im_scales = tf.py_func(get_imscale, [self._im_info], tf.float32) boxes = rois[:, 1:5] / im_scales[0] bbox_pred = tf.reshape(bbox_pred, [bbox_pred.shape[0], -1]) box_deltas = bbox_pred if name == 'bbox': pred_boxes = bbox_transform_inv_tf(boxes, box_deltas) final_boxes = clip_boxes_tf(pred_boxes, self._im_info) else: final_boxes = clip_boxes_tf(boxes, self._im_info) else: bbox_targets = self._proposal_targets['bbox_targets'] rois = self._proposal_targets['rois'] im_scales = tf.py_func(get_imscale, [self._im_info], tf.float32) boxes = rois[:, 1:5] / im_scales[0] bbox_targets = tf.reshape(bbox_targets, [bbox_targets.shape[0], -1]) box_deltas = bbox_targets if name == 'bbox': target_boxes = bbox_transform_inv_tf(boxes, box_deltas) final_boxes = clip_boxes_tf(target_boxes, self._im_info) else: final_boxes = clip_boxes_tf(boxes, self._im_info) return final_boxes
def _rep_gt_loss(self, bbox_pred, bbox_second_targets, mask): stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (self._num_classes)) means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (self._num_classes)) bbox_deltas = bbox_pred * stds bbox_deltas += means boxes1 = tf.identity(self._predictions['rois'][:, 1:5]) decoded_boxes = bbox_transform_inv_tf(boxes1, bbox_deltas) decoded_boxes = tf.boolean_mask(decoded_boxes, mask) decoded_boxes = tf.reshape(decoded_boxes, [-1, 4]) # decoded_boxes = clip_boxes_tf(decoded_boxes, self._im_info[:2]) boxes2 = tf.identity(self._predictions['rois'][:, 1:5]) bbox_second_targets_deltas = bbox_second_targets * stds bbox_second_targets_deltas += means decoded_second_target_boxes = bbox_transform_inv_tf( boxes2, bbox_second_targets_deltas) decoded_second_target_boxes = tf.boolean_mask( decoded_second_target_boxes, mask) decoded_second_target_boxes = tf.reshape(decoded_second_target_boxes, [-1, 4]) # decoded_second_target_boxes = clip_boxes_tf(decoded_second_target_boxes, self._im_info[:2]) iog, I = IoG(decoded_second_target_boxes, decoded_boxes) # rep_gt_loss = -tf.reduce_mean(tf.log(1.0 - iog + 1e-10)) rep_gt_loss = tf.reduce_mean(iog) return rep_gt_loss, iog
def proposal_top_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4)) scores = tf.reshape(scores, shape=(-1, )) # Do the selection here top_scores, top_inds = tf.nn.top_k(scores, k=rpn_top_n) top_scores = tf.reshape(top_scores, shape=(-1, 1)) top_anchors = tf.gather(anchors, top_inds) top_rpn_bbox = tf.gather(rpn_bbox_pred, top_inds) proposals = bbox_transform_inv_tf(top_anchors, top_rpn_bbox) # Clip predicted boxes to image proposals = clip_boxes_tf(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 proposals = tf.to_float(proposals) batch_inds = tf.zeros((rpn_top_n, 1)) blob = tf.concat([batch_inds, proposals], 1) return blob, top_scores
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] scores = tf.reshape(scores, shape=(-1, )) rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4)) proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred) proposals = clip_boxes_tf(proposals, im_info[:2]) # Non-maximal suppression indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh) boxes = tf.gather(proposals, indices) boxes = tf.to_float(boxes) scores = tf.gather(scores, indices) scores = tf.reshape(scores, shape=(-1, 1)) # Only support single image as input batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32) blob = tf.concat([batch_inds, boxes], 1) return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4)) scores = tf.reshape(scores, shape=(-1,)) # Do the selection here top_scores, top_inds = tf.nn.top_k(scores, k=rpn_top_n) top_scores = tf.reshape(top_scores, shape=(-1, 1)) top_anchors = tf.gather(anchors, top_inds) top_rpn_bbox = tf.gather(rpn_bbox_pred, top_inds) proposals = bbox_transform_inv_tf(top_anchors, top_rpn_bbox) # Clip predicted boxes to image proposals = clip_boxes_tf(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 proposals = tf.to_float(proposals) batch_inds = tf.zeros((rpn_top_n, 1)) blob = tf.concat([batch_inds, proposals], 1) return blob, top_scores
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] scores = tf.reshape(scores, shape=(-1, )) # shape=(-1,)表示变成一维数组,即所有元素形成一个一维向量 rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4)) proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred) proposals = clip_boxes_tf(proposals, im_info[:2]) # im_info[:2]表示输出0至1行 # clip_boxes_tf是对proposals进行剪裁,使所有的框都在图像的内部,超出的部分剪裁掉 # Non-maximal suppression indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh) # indices是一个一维向量,表示最后被选出来的proposals的下标 boxes = tf.gather(proposals, indices) #通过上面的下标在proposals中索引出要留下来的box boxes = tf.to_float(boxes) scores = tf.gather(scores, indices) scores = tf.reshape(scores, shape=(-1, 1)) # Only support single image as input batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32) blob = tf.concat([batch_inds, boxes], 1) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] scores = tf.reshape(scores, shape=(-1,)) rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4)) proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred) proposals = clip_boxes_tf(proposals, im_info[:2]) # Non-maximal suppression indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh) boxes = tf.gather(proposals, indices) boxes = tf.to_float(boxes) scores = tf.gather(scores, indices) scores = tf.reshape(scores, shape=(-1, 1)) # Only support single image as input batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32) blob = tf.concat([batch_inds, boxes], 1) return blob, scores
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """ rpn_cls_prob = Tensor("vgg_16_1/rpn_cls_prob/transpose_1:0", shape=(1, ?, ?, 18), dtype=float32) rpn_bbox_pred = Tensor("vgg_16_1/rpn_bbox_pred/BiasAdd:0", shape=(1, ?, ?, 36), dtype=float32) im_info = Tensor("Placeholder_1:0", shape=(3,), dtype=float32) cfg_key = TRAIN _feat_stride = [16] anchors = Tensor("vgg_16_1/ANCHOR_default/Cast:0", shape=(?, 4), dtype=float32) num_anchors = 9 """ if type(cfg_key) == bytes: #True cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N #12000 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N #2000 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH #0.7 # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] #????为什么 scores = tf.reshape(scores, shape=(-1, )) rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4)) proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred) #得到修正后的框 proposals = clip_boxes_tf(proposals, im_info[:2]) #去除不合格的预测框 # Non-maximal suppression,,得到非极大值抑制后的框的索引信息 indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh) #根据刷选出来的索引号,得到对应的框 ''' a = tf.gather([[1,2],[4,5],[6,7],[8,9]],[0,2,3]) a.eval() >>>[[1,2],[6,7],[8,9]] ''' boxes = tf.gather(proposals, indices) boxes = tf.to_float(boxes) scores = tf.gather(scores, indices) scores = tf.reshape(scores, shape=(-1, 1)) # Only support single image as input ''' 假设经过'Non-maximal suppression'后,留下了2k个框,那么boxes的维度就是(2k,4) 下面语句的作用就是往boxes的第二维度添加个0,boxes的维度变为(2k,5),如[[1,2,3,4],[5,6,7,8]]>>>[[0,1,2,3,4],[0,5,6,7,8]] ''' batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32) blob = tf.concat([batch_inds, boxes], 1) # return blob, scores
def _return_RPN_info(self): from model.bbox_transform import bbox_transform_inv_tf, clip_boxes_tf rpn_bbox_pred = tf.reshape(self._predictions["rpn_bbox_pred"], shape=(-1, 4)) proposals = bbox_transform_inv_tf(self._anchors, rpn_bbox_pred) proposals = clip_boxes_tf(proposals, self._im_info[:2]) return self._predictions["rpn_cls_score"],\ self._predictions["rpn_cls_score_reshape"],\ self._predictions["rpn_cls_prob"],\ self._predictions["rpn_cls_pred"],\ self._predictions["rpn_bbox_pred"],\ self._predictions["rois"], \ self._predictions["top_rpn_scores"],\ proposals
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] # shape=(1,?,?,18) scores = tf.reshape(scores, shape=(-1,)) # bbox rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4)) proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred) # rpn_bbox_pred是dx,dy,dh,dw # proposals是经过dx,dy,dh,dw修正后得到的bbox角点坐标 proposals = clip_boxes_tf(proposals, im_info[:2]) # 裁剪掉超出图像边界的部分 # Non-maximal suppression indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh) # 非最大值抑制 # 去除掉与这个概率最大的边界框的loU大于一个阈值的其他边界框 # 按照scores降序选择边界框的子集 # 返回的是选出来,留下来的边框下标 boxes = tf.gather(proposals, indices) # 得到proposals中第indices个索引对应的值 # boxes是选出来的边框 boxes = tf.to_float(boxes) scores = tf.gather(scores, indices) # scores是选出来框对应的得分 scores = tf.reshape(scores, shape=(-1, 1)) # Only support single image as input batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32) blob = tf.concat([batch_inds, boxes], 1) # 链接bath_inds和boxes # blob是边框坐标,前面加了bath_inds貌似是为了让索引号从1开始 # scores是边框对应的分数 return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): #字符串编码格式转换 if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') #读取训练 or 测试需要的对应参数 #RPN_PRE_NMS_TOP_N: 对RPN候选区域使用NMS前,保留最高分数的区域的个数 #RPN_POST_NMS_TOP_N:对RPN候选区域使用NMS后,保留最高分数的区域的个数 #RPN_NMS_THRESH:NMS候选区域阈值 pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes # 获取得分和包围框 scores = rpn_cls_prob[:, :, :, num_anchors:] scores = tf.reshape(scores, shape=(-1, )) rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4)) #bbox_transform_inv_tf 包围框精修(根据网络预测结果修改anchor) proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred) # 对精修进行裁剪(避免anchor在图像上越界) proposals = clip_boxes_tf(proposals, im_info[:2]) # Non-maximal suppression # NMS,非极大值抑制 indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh) #获取对应的非极大抑制后的区域 boxes = tf.gather(proposals, indices) boxes = tf.to_float(boxes) #获取对于的非极大抑制后的得分 scores = tf.gather(scores, indices) scores = tf.reshape(scores, shape=(-1, 1)) # Only support single image as input # 在每个indices前加入batch内索引,由于目前仅支持每个batch一张图像作为输入所以均为0 batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32) blob = tf.concat([batch_inds, boxes], 1) return blob, scores
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] scores = tf.reshape(scores, shape=(-1, )) rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4)) ## to anchors we apply bbox_transform operation,and we make can calculate (tx*,ty*,tw*,th*), ## and return the pred four coordinates(x1,y1,x2,y2) proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred) proposals = clip_boxes_tf(proposals, im_info[:2]) ## because the proposals are too much to choose,so we apply non-maximal suppression,and we can get about 2000 proposals in training ## apply non-maximal suppression to the proposals # Non-maximal suppression indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh) ## now we get the proposals after nms operation,and we can get proposals scores and bbox_preds, ## scores are predicted scores,not label boxes = tf.gather(proposals, indices) boxes = tf.to_float(boxes) scores = tf.gather(scores, indices) scores = tf.reshape(scores, shape=(-1, 1)) # Only support single image as input batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32) blob = tf.concat([batch_inds, boxes], 1) return blob, scores
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): #根据预测偏移量计算预测边界 if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] #取出前景分数 [1,h,w,18]->[1,h,w,9] scores = tf.reshape(scores, shape=(-1, )) #[1,h,w,9] ->[w*h*9,1] rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4)) #[1,w,h,9*4]->[w*h*9,4] proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred) #根据预测的偏移量计算预测边界 proposals = clip_boxes_tf(proposals, im_info[:2]) #调整边界,使得不超过边界 # Non-maximal suppression indices = tf.image.non_max_suppression( proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh) #nms筛选,最大输出2000的下标 boxes = tf.gather(proposals, indices) #选出对应下标的物体框 boxes = tf.to_float(boxes) scores = tf.gather(scores, indices) #选出对应下标的前景分数 scores = tf.reshape(scores, shape=(-1, 1)) #[w*h*9,1] # Only support single image as input batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32) blob = tf.concat([batch_inds, boxes], 1) #重新连接,重构blob [0,xmin,ymin,xmax,ymax] return blob, scores
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] scores = tf.reshape(scores, shape=(-1, )) rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4)) # 每个anchor的边框学习之前得到的偏移量(这里的偏移量就是需要学习的rpn_bbox_pred)做位移和缩放,获取最终的预测边框。 # 也就是将原始proposal A, 通过学习rpn_bbox_pred中的参数,得到一个与ground truth G 相近的预测边框 G'。 proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred) # 剪裁掉超出原始图片边框的部分 proposals = clip_boxes_tf(proposals, im_info[:2]) # Non-maximal suppression indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh) boxes = tf.gather(proposals, indices) boxes = tf.to_float(boxes) scores = tf.gather(scores, indices) scores = tf.reshape(scores, shape=(-1, 1)) # Only support single image as input batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32) blob = tf.concat([batch_inds, boxes], 1) return blob, scores # 输出筛选后的窗口以及其得分