예제 #1
    def get_boxes(self, name, phase):
        if phase == 'pred':
            bbox_pred = self._predictions['bbox_pred']
            rois = self._predictions['rois']
            im_scales = tf.py_func(get_imscale, [self._im_info], tf.float32)
            boxes = rois[:, 1:5] / im_scales[0]
            bbox_pred = tf.reshape(bbox_pred, [bbox_pred.shape[0], -1])
            box_deltas = bbox_pred
            if name == 'bbox':
                pred_boxes = bbox_transform_inv_tf(boxes, box_deltas)
                final_boxes = clip_boxes_tf(pred_boxes, self._im_info)
                final_boxes = clip_boxes_tf(boxes, self._im_info)
            bbox_targets = self._proposal_targets['bbox_targets']
            rois = self._proposal_targets['rois']
            im_scales = tf.py_func(get_imscale, [self._im_info], tf.float32)
            boxes = rois[:, 1:5] / im_scales[0]
            bbox_targets = tf.reshape(bbox_targets,
                                      [bbox_targets.shape[0], -1])
            box_deltas = bbox_targets
            if name == 'bbox':
                target_boxes = bbox_transform_inv_tf(boxes, box_deltas)
                final_boxes = clip_boxes_tf(target_boxes, self._im_info)
                final_boxes = clip_boxes_tf(boxes, self._im_info)

        return final_boxes
예제 #2
    def _rep_gt_loss(self, bbox_pred, bbox_second_targets, mask):
        stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS),
        means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS),
        bbox_deltas = bbox_pred * stds
        bbox_deltas += means
        boxes1 = tf.identity(self._predictions['rois'][:, 1:5])
        decoded_boxes = bbox_transform_inv_tf(boxes1, bbox_deltas)
        decoded_boxes = tf.boolean_mask(decoded_boxes, mask)
        decoded_boxes = tf.reshape(decoded_boxes, [-1, 4])
        # decoded_boxes = clip_boxes_tf(decoded_boxes, self._im_info[:2])

        boxes2 = tf.identity(self._predictions['rois'][:, 1:5])
        bbox_second_targets_deltas = bbox_second_targets * stds
        bbox_second_targets_deltas += means
        decoded_second_target_boxes = bbox_transform_inv_tf(
            boxes2, bbox_second_targets_deltas)
        decoded_second_target_boxes = tf.boolean_mask(
            decoded_second_target_boxes, mask)
        decoded_second_target_boxes = tf.reshape(decoded_second_target_boxes,
                                                 [-1, 4])
        # decoded_second_target_boxes = clip_boxes_tf(decoded_second_target_boxes, self._im_info[:2])
        iog, I = IoG(decoded_second_target_boxes, decoded_boxes)
        # rep_gt_loss = -tf.reduce_mean(tf.log(1.0 - iog + 1e-10))
        rep_gt_loss = tf.reduce_mean(iog)
        return rep_gt_loss, iog
예제 #3
def proposal_top_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride,
                          anchors, num_anchors):
    """A layer that just selects the top region proposals
     without using non-maximal suppression,
     For details please see the technical report
    rpn_top_n = cfg.TEST.RPN_TOP_N

    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))
    scores = tf.reshape(scores, shape=(-1, ))

    # Do the selection here
    top_scores, top_inds = tf.nn.top_k(scores, k=rpn_top_n)
    top_scores = tf.reshape(top_scores, shape=(-1, 1))
    top_anchors = tf.gather(anchors, top_inds)
    top_rpn_bbox = tf.gather(rpn_bbox_pred, top_inds)
    proposals = bbox_transform_inv_tf(top_anchors, top_rpn_bbox)

    # Clip predicted boxes to image
    proposals = clip_boxes_tf(proposals, im_info[:2])

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    proposals = tf.to_float(proposals)
    batch_inds = tf.zeros((rpn_top_n, 1))
    blob = tf.concat([batch_inds, proposals], 1)
    return blob, top_scores
예제 #4
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    scores = tf.reshape(scores, shape=(-1, ))
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
    proposals = clip_boxes_tf(proposals, im_info[:2])

    # Non-maximal suppression
    indices = tf.image.non_max_suppression(proposals,

    boxes = tf.gather(proposals, indices)
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)

    return blob, scores
예제 #5
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors):
  """A layer that just selects the top region proposals
     without using non-maximal suppression,
     For details please see the technical report
  rpn_top_n = cfg.TEST.RPN_TOP_N

  scores = rpn_cls_prob[:, :, :, num_anchors:]
  rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))
  scores = tf.reshape(scores, shape=(-1,))

  # Do the selection here
  top_scores, top_inds = tf.nn.top_k(scores, k=rpn_top_n)
  top_scores = tf.reshape(top_scores, shape=(-1, 1))
  top_anchors = tf.gather(anchors, top_inds)
  top_rpn_bbox = tf.gather(rpn_bbox_pred, top_inds)
  proposals = bbox_transform_inv_tf(top_anchors, top_rpn_bbox)

  # Clip predicted boxes to image
  proposals = clip_boxes_tf(proposals, im_info[:2])

  # Output rois blob
  # Our RPN implementation only supports a single input image, so all
  # batch inds are 0
  proposals = tf.to_float(proposals)
  batch_inds = tf.zeros((rpn_top_n, 1))
  blob = tf.concat([batch_inds, proposals], 1)
  return blob, top_scores
예제 #6
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    scores = tf.reshape(scores,
                        shape=(-1, ))  # shape=(-1,)表示变成一维数组,即所有元素形成一个一维向量
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
    proposals = clip_boxes_tf(proposals, im_info[:2])  # im_info[:2]表示输出0至1行
    # clip_boxes_tf是对proposals进行剪裁,使所有的框都在图像的内部,超出的部分剪裁掉
    # Non-maximal suppression
    indices = tf.image.non_max_suppression(proposals,
    # indices是一个一维向量,表示最后被选出来的proposals的下标
    boxes = tf.gather(proposals, indices)  #通过上面的下标在proposals中索引出要留下来的box
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)

    return blob, scores
예제 #7
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
  if type(cfg_key) == bytes:
    cfg_key = cfg_key.decode('utf-8')
  pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
  post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
  nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

  # Get the scores and bounding boxes
  scores = rpn_cls_prob[:, :, :, num_anchors:]
  scores = tf.reshape(scores, shape=(-1,))
  rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

  proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
  proposals = clip_boxes_tf(proposals, im_info[:2])

  # Non-maximal suppression
  indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh)

  boxes = tf.gather(proposals, indices)
  boxes = tf.to_float(boxes)
  scores = tf.gather(scores, indices)
  scores = tf.reshape(scores, shape=(-1, 1))

  # Only support single image as input
  batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
  blob = tf.concat([batch_inds, boxes], 1)

  return blob, scores
예제 #8
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
  rpn_cls_prob = Tensor("vgg_16_1/rpn_cls_prob/transpose_1:0", shape=(1, ?, ?, 18), dtype=float32)
  rpn_bbox_pred = Tensor("vgg_16_1/rpn_bbox_pred/BiasAdd:0", shape=(1, ?, ?, 36), dtype=float32)
  im_info = Tensor("Placeholder_1:0", shape=(3,), dtype=float32)
  cfg_key = TRAIN
  _feat_stride = [16]
  anchors = Tensor("vgg_16_1/ANCHOR_default/Cast:0", shape=(?, 4), dtype=float32)
  num_anchors = 9
    if type(cfg_key) == bytes:  #True
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N  #12000
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N  #2000
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  #0.7

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]  #????为什么
    scores = tf.reshape(scores, shape=(-1, ))
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)  #得到修正后的框
    proposals = clip_boxes_tf(proposals, im_info[:2])  #去除不合格的预测框

    # Non-maximal suppression,,得到非极大值抑制后的框的索引信息
    indices = tf.image.non_max_suppression(proposals,

  a = tf.gather([[1,2],[4,5],[6,7],[8,9]],[0,2,3])
    boxes = tf.gather(proposals, indices)
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
  假设经过'Non-maximal suppression'后,留下了2k个框,那么boxes的维度就是(2k,4)
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)  #

    return blob, scores
예제 #9
 def _return_RPN_info(self):
     from model.bbox_transform import bbox_transform_inv_tf, clip_boxes_tf
     rpn_bbox_pred = tf.reshape(self._predictions["rpn_bbox_pred"],
                                shape=(-1, 4))
     proposals = bbox_transform_inv_tf(self._anchors, rpn_bbox_pred)
     proposals = clip_boxes_tf(proposals, self._im_info[:2])
     return self._predictions["rpn_cls_score"],\
            self._predictions["rois"], \
예제 #10
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
  if type(cfg_key) == bytes:
    cfg_key = cfg_key.decode('utf-8')
  pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
  post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
  nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

  # Get the scores and bounding boxes
  scores = rpn_cls_prob[:, :, :, num_anchors:]
  # shape=(1,?,?,18)
  scores = tf.reshape(scores, shape=(-1,))
  # bbox
  rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

  proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
  # rpn_bbox_pred是dx,dy,dh,dw
  # proposals是经过dx,dy,dh,dw修正后得到的bbox角点坐标
  proposals = clip_boxes_tf(proposals, im_info[:2])
  # 裁剪掉超出图像边界的部分

  # Non-maximal suppression
  indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh)
  # 非最大值抑制
  # 去除掉与这个概率最大的边界框的loU大于一个阈值的其他边界框
  # 按照scores降序选择边界框的子集
  # 返回的是选出来,留下来的边框下标

  boxes = tf.gather(proposals, indices)
  # 得到proposals中第indices个索引对应的值
  # boxes是选出来的边框
  boxes = tf.to_float(boxes)
  scores = tf.gather(scores, indices)
  # scores是选出来框对应的得分
  scores = tf.reshape(scores, shape=(-1, 1))

  # Only support single image as input
  batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
  blob = tf.concat([batch_inds, boxes], 1)
  # 链接bath_inds和boxes
  # blob是边框坐标,前面加了bath_inds貌似是为了让索引号从1开始
  # scores是边框对应的分数

  return blob, scores
예제 #11
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    #读取训练 or 测试需要的对应参数
    #RPN_PRE_NMS_TOP_N: 对RPN候选区域使用NMS前,保留最高分数的区域的个数
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    # 获取得分和包围框
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    scores = tf.reshape(scores, shape=(-1, ))
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))
    #bbox_transform_inv_tf 包围框精修(根据网络预测结果修改anchor)
    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
    # 对精修进行裁剪(避免anchor在图像上越界)
    proposals = clip_boxes_tf(proposals, im_info[:2])

    # Non-maximal suppression
    # NMS,非极大值抑制
    indices = tf.image.non_max_suppression(proposals,
    boxes = tf.gather(proposals, indices)
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
    # 在每个indices前加入batch内索引,由于目前仅支持每个batch一张图像作为输入所以均为0
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)

    return blob, scores
예제 #12
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    scores = tf.reshape(scores, shape=(-1, ))
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

    ## to anchors we apply bbox_transform operation,and we make can calculate (tx*,ty*,tw*,th*),
    ## and return the pred four coordinates(x1,y1,x2,y2)
    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
    proposals = clip_boxes_tf(proposals, im_info[:2])

    ## because the proposals are too much to choose,so we apply non-maximal suppression,and we can get about 2000 proposals in training
    ## apply non-maximal suppression to the proposals
    # Non-maximal suppression
    indices = tf.image.non_max_suppression(proposals,

    ## now we get the proposals after nms operation,and we can get proposals scores and bbox_preds,
    ## scores are predicted scores,not label
    boxes = tf.gather(proposals, indices)
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)

    return blob, scores
예제 #13
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]  #取出前景分数 [1,h,w,18]->[1,h,w,9]
    scores = tf.reshape(scores, shape=(-1, ))  #[1,h,w,9] ->[w*h*9,1]
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred,
                               shape=(-1, 4))  #[1,w,h,9*4]->[w*h*9,4]

    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)  #根据预测的偏移量计算预测边界
    proposals = clip_boxes_tf(proposals, im_info[:2])  #调整边界,使得不超过边界

    # Non-maximal suppression
    indices = tf.image.non_max_suppression(
        iou_threshold=nms_thresh)  #nms筛选,最大输出2000的下标

    boxes = tf.gather(proposals, indices)  #选出对应下标的物体框
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)  #选出对应下标的前景分数
    scores = tf.reshape(scores, shape=(-1, 1))  #[w*h*9,1]

    # Only support single image as input
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes],
                     1)  #重新连接,重构blob  [0,xmin,ymin,xmax,ymax]

    return blob, scores
예제 #14
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                      _feat_stride, anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    scores = tf.reshape(scores, shape=(-1, ))
    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))

    # 每个anchor的边框学习之前得到的偏移量(这里的偏移量就是需要学习的rpn_bbox_pred)做位移和缩放,获取最终的预测边框。
    # 也就是将原始proposal A, 通过学习rpn_bbox_pred中的参数,得到一个与ground truth G 相近的预测边框 G'。
    proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)

    # 剪裁掉超出原始图片边框的部分
    proposals = clip_boxes_tf(proposals, im_info[:2])

    # Non-maximal suppression
    indices = tf.image.non_max_suppression(proposals,

    boxes = tf.gather(proposals, indices)
    boxes = tf.to_float(boxes)
    scores = tf.gather(scores, indices)
    scores = tf.reshape(scores, shape=(-1, 1))

    # Only support single image as input
    batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
    blob = tf.concat([batch_inds, boxes], 1)

    return blob, scores  # 输出筛选后的窗口以及其得分