Example #1
0
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    scores = tf.reshape(scores, shape=(-1, ))
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
    proposals = clip_boxes_tf(proposals, im_info[:2])

    # Non-maximal suppression
    indices = tf.image.non_max_suppression(proposals,
                                           scores,
                                           max_output_size=post_nms_topN,
                                           iou_threshold=nms_thresh)

    boxes = tf.gather(proposals, indices)
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)

    return blob, scores
Example #2
0
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    scores = tf.reshape(scores,
                        shape=(-1, ))  # shape=(-1,)表示变成一维数组,即所有元素形成一个一维向量
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
    proposals = clip_boxes_tf(proposals, im_info[:2])  # im_info[:2]表示输出0至1行
    # clip_boxes_tf是对proposals进行剪裁,使所有的框都在图像的内部,超出的部分剪裁掉
    # Non-maximal suppression
    indices = tf.image.non_max_suppression(proposals,
                                           scores,
                                           max_output_size=post_nms_topN,
                                           iou_threshold=nms_thresh)
    # indices是一个一维向量,表示最后被选出来的proposals的下标
    boxes = tf.gather(proposals, indices)  #通过上面的下标在proposals中索引出要留下来的box
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)

    return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors):
  """A layer that just selects the top region proposals
     without using non-maximal suppression,
     For details please see the technical report
  """
  rpn_top_n = cfg.TEST.RPN_TOP_N

  scores = rpn_cls_prob[:, :, :, num_anchors:]
  rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))
  scores = tf.reshape(scores, shape=(-1,))

  # Do the selection here
  top_scores, top_inds = tf.nn.top_k(scores, k=rpn_top_n)
  top_scores = tf.reshape(top_scores, shape=(-1, 1))
  top_anchors = tf.gather(anchors, top_inds)
  top_rpn_bbox = tf.gather(rpn_bbox_pred, top_inds)
  proposals = bbox_transform_inv_tf(top_anchors, top_rpn_bbox)

  # Clip predicted boxes to image
  proposals = clip_boxes_tf(proposals, im_info[:2])

  # Output rois blob
  # Our RPN implementation only supports a single input image, so all
  # batch inds are 0
  proposals = tf.to_float(proposals)
  batch_inds = tf.zeros((rpn_top_n, 1))
  blob = tf.concat([batch_inds, proposals], 1)
  return blob, top_scores
def proposal_top_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors):
  """A layer that just selects the top region proposals
     without using non-maximal suppression,
     For details please see the technical report
  """
  rpn_top_n = cfg.TEST.RPN_TOP_N

  scores = rpn_cls_prob[:, :, :, num_anchors:]
  rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))
  scores = tf.reshape(scores, shape=(-1,))

  # Do the selection here
  top_scores, top_inds = tf.nn.top_k(scores, k=rpn_top_n)
  top_scores = tf.reshape(top_scores, shape=(-1, 1))
  top_anchors = tf.gather(anchors, top_inds)
  top_rpn_bbox = tf.gather(rpn_bbox_pred, top_inds)
  proposals = bbox_transform_inv_tf(top_anchors, top_rpn_bbox)

  # Clip predicted boxes to image
  proposals = clip_boxes_tf(proposals, im_info[:2])

  # Output rois blob
  # Our RPN implementation only supports a single input image, so all
  # batch inds are 0
  proposals = tf.to_float(proposals)
  batch_inds = tf.zeros((rpn_top_n, 1))
  blob = tf.concat([batch_inds, proposals], 1)
  return blob, top_scores
Example #5
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
  if type(cfg_key) == bytes:
    cfg_key = cfg_key.decode('utf-8')
  pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
  post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
  nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

  # Get the scores and bounding boxes
  scores = rpn_cls_prob[:, :, :, num_anchors:]
  scores = tf.reshape(scores, shape=(-1,))
  rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

  proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
  proposals = clip_boxes_tf(proposals, im_info[:2])

  # Non-maximal suppression
  indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh)

  boxes = tf.gather(proposals, indices)
  boxes = tf.to_float(boxes)
  scores = tf.gather(scores, indices)
  scores = tf.reshape(scores, shape=(-1, 1))

  # Only support single image as input
  batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
  blob = tf.concat([batch_inds, boxes], 1)

  return blob, scores
Example #6
0
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
    """
  rpn_cls_prob = Tensor("vgg_16_1/rpn_cls_prob/transpose_1:0", shape=(1, ?, ?, 18), dtype=float32)
  rpn_bbox_pred = Tensor("vgg_16_1/rpn_bbox_pred/BiasAdd:0", shape=(1, ?, ?, 36), dtype=float32)
  im_info = Tensor("Placeholder_1:0", shape=(3,), dtype=float32)
  cfg_key = TRAIN
  _feat_stride = [16]
  anchors = Tensor("vgg_16_1/ANCHOR_default/Cast:0", shape=(?, 4), dtype=float32)
  num_anchors = 9
  """
    if type(cfg_key) == bytes:  #True
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N  #12000
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N  #2000
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  #0.7

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]  #????为什么
    scores = tf.reshape(scores, shape=(-1, ))
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)  #得到修正后的框
    proposals = clip_boxes_tf(proposals, im_info[:2])  #去除不合格的预测框

    # Non-maximal suppression,,得到非极大值抑制后的框的索引信息
    indices = tf.image.non_max_suppression(proposals,
                                           scores,
                                           max_output_size=post_nms_topN,
                                           iou_threshold=nms_thresh)

    #根据刷选出来的索引号,得到对应的框
    '''
  a = tf.gather([[1,2],[4,5],[6,7],[8,9]],[0,2,3])
  a.eval()
  >>>[[1,2],[6,7],[8,9]]
  '''
    boxes = tf.gather(proposals, indices)
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
    '''
  假设经过'Non-maximal suppression'后,留下了2k个框,那么boxes的维度就是(2k,4)
  下面语句的作用就是往boxes的第二维度添加个0,boxes的维度变为(2k,5),如[[1,2,3,4],[5,6,7,8]]>>>[[0,1,2,3,4],[0,5,6,7,8]]
  '''
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)  #

    return blob, scores
Example #7
0
 def _return_RPN_info(self):
     from model.bbox_transform import bbox_transform_inv_tf, clip_boxes_tf
     rpn_bbox_pred = tf.reshape(self._predictions["rpn_bbox_pred"],
                                shape=(-1, 4))
     proposals = bbox_transform_inv_tf(self._anchors, rpn_bbox_pred)
     proposals = clip_boxes_tf(proposals, self._im_info[:2])
     return self._predictions["rpn_cls_score"],\
            self._predictions["rpn_cls_score_reshape"],\
            self._predictions["rpn_cls_prob"],\
            self._predictions["rpn_cls_pred"],\
            self._predictions["rpn_bbox_pred"],\
            self._predictions["rois"], \
            self._predictions["top_rpn_scores"],\
            proposals
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
  if type(cfg_key) == bytes:
    cfg_key = cfg_key.decode('utf-8')
  pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
  post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
  nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

  # Get the scores and bounding boxes
  scores = rpn_cls_prob[:, :, :, num_anchors:]
  # shape=(1,?,?,18)
  scores = tf.reshape(scores, shape=(-1,))
  # bbox
  rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

  proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
  # rpn_bbox_pred是dx,dy,dh,dw
  # proposals是经过dx,dy,dh,dw修正后得到的bbox角点坐标
  proposals = clip_boxes_tf(proposals, im_info[:2])
  # 裁剪掉超出图像边界的部分

  # Non-maximal suppression
  indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh)
  # 非最大值抑制
  # 去除掉与这个概率最大的边界框的loU大于一个阈值的其他边界框
  # 按照scores降序选择边界框的子集
  # 返回的是选出来,留下来的边框下标

  boxes = tf.gather(proposals, indices)
  # 得到proposals中第indices个索引对应的值
  # boxes是选出来的边框
  boxes = tf.to_float(boxes)
  scores = tf.gather(scores, indices)
  # scores是选出来框对应的得分
  scores = tf.reshape(scores, shape=(-1, 1))

  # Only support single image as input
  batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
  blob = tf.concat([batch_inds, boxes], 1)
  # 链接bath_inds和boxes
  # blob是边框坐标,前面加了bath_inds貌似是为了让索引号从1开始
  # scores是边框对应的分数

  return blob, scores
Example #9
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors):
    #字符串编码格式转换
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    #读取训练 or 测试需要的对应参数
    #RPN_PRE_NMS_TOP_N: 对RPN候选区域使用NMS前,保留最高分数的区域的个数
    #RPN_POST_NMS_TOP_N:对RPN候选区域使用NMS后,保留最高分数的区域的个数
    #RPN_NMS_THRESH:NMS候选区域阈值
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    # 获取得分和包围框
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    scores = tf.reshape(scores, shape=(-1, ))
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))
    #bbox_transform_inv_tf 包围框精修(根据网络预测结果修改anchor)
    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
    # 对精修进行裁剪(避免anchor在图像上越界)
    proposals = clip_boxes_tf(proposals, im_info[:2])

    # Non-maximal suppression
    # NMS,非极大值抑制
    indices = tf.image.non_max_suppression(proposals,
                                           scores,
                                           max_output_size=post_nms_topN,
                                           iou_threshold=nms_thresh)
    #获取对应的非极大抑制后的区域
    boxes = tf.gather(proposals, indices)
    boxes = tf.to_float(boxes)
    #获取对于的非极大抑制后的得分
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
    # 在每个indices前加入batch内索引,由于目前仅支持每个batch一张图像作为输入所以均为0
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)

    return blob, scores
Example #10
0
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    scores = tf.reshape(scores, shape=(-1, ))
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

    ## to anchors we apply bbox_transform operation,and we make can calculate (tx*,ty*,tw*,th*),
    ## and return the pred four coordinates(x1,y1,x2,y2)
    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
    proposals = clip_boxes_tf(proposals, im_info[:2])

    ## because the proposals are too much to choose,so we apply non-maximal suppression,and we can get about 2000 proposals in training
    ## apply non-maximal suppression to the proposals
    # Non-maximal suppression
    indices = tf.image.non_max_suppression(proposals,
                                           scores,
                                           max_output_size=post_nms_topN,
                                           iou_threshold=nms_thresh)

    ## now we get the proposals after nms operation,and we can get proposals scores and bbox_preds,
    ## scores are predicted scores,not label
    boxes = tf.gather(proposals, indices)
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)

    return blob, scores
Example #11
0
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
    #根据预测偏移量计算预测边界
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]  #取出前景分数 [1,h,w,18]->[1,h,w,9]
    scores = tf.reshape(scores, shape=(-1, ))  #[1,h,w,9] ->[w*h*9,1]
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred,
                               shape=(-1, 4))  #[1,w,h,9*4]->[w*h*9,4]

    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)  #根据预测的偏移量计算预测边界
    proposals = clip_boxes_tf(proposals, im_info[:2])  #调整边界,使得不超过边界

    # Non-maximal suppression
    indices = tf.image.non_max_suppression(
        proposals,
        scores,
        max_output_size=post_nms_topN,
        iou_threshold=nms_thresh)  #nms筛选,最大输出2000的下标

    boxes = tf.gather(proposals, indices)  #选出对应下标的物体框
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)  #选出对应下标的前景分数
    scores = tf.reshape(scores, shape=(-1, 1))  #[w*h*9,1]

    # Only support single image as input
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes],
                     1)  #重新连接,重构blob  [0,xmin,ymin,xmax,ymax]

    return blob, scores
Example #12
0
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    scores = tf.reshape(scores, shape=(-1, ))
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

    # 每个anchor的边框学习之前得到的偏移量(这里的偏移量就是需要学习的rpn_bbox_pred)做位移和缩放,获取最终的预测边框。
    # 也就是将原始proposal A, 通过学习rpn_bbox_pred中的参数,得到一个与ground truth G 相近的预测边框 G'。
    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)

    # 剪裁掉超出原始图片边框的部分
    proposals = clip_boxes_tf(proposals, im_info[:2])

    # Non-maximal suppression
    indices = tf.image.non_max_suppression(proposals,
                                           scores,
                                           max_output_size=post_nms_topN,
                                           iou_threshold=nms_thresh)

    boxes = tf.gather(proposals, indices)
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)

    return blob, scores  # 输出筛选后的窗口以及其得分