def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
            'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in range(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes': boxes,
                'gt_classes': np.zeros((num_boxes,), dtype=np.int32),
                'gt_overlaps': overlaps,
                'flipped': False,
                'seg_areas': np.zeros((num_boxes,), dtype=np.float32),
            })
        return roidb
Ejemplo n.º 2
0
def _compute_targets(rois, overlaps, labels):
    """Compute bounding-box regression targets for an image."""
    # Indices of ground-truth ROIs
    gt_inds = np.where(overlaps == 1)[0]
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return np.zeros((rois.shape[0], 5), dtype=np.float32)
    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = bbox_overlaps(
        np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
        np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]

    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    targets[ex_inds, 0] = labels[ex_inds]
    targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
    return targets
Ejemplo n.º 3
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
  """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
  # overlaps: (rois x gt_boxes)
  overlaps = bbox_overlaps(
    np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
    np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
  gt_assignment = overlaps.argmax(axis=1)
  max_overlaps = overlaps.max(axis=1)
  labels = gt_boxes[gt_assignment, 4]

  # Select foreground RoIs as those with >= FG_THRESH overlap
  fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
  # Guard against the case when an image has fewer than fg_rois_per_image
  # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
  bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                     (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
  if bg_inds.size == 0:
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                     (max_overlaps >= 0))[0]

  # Small modification to the original version where we ensure a fixed number of regions are sampled
  if fg_inds.size > 0 and bg_inds.size > 0:
    fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
    fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
    bg_rois_per_image = rois_per_image - fg_rois_per_image
    to_replace = bg_inds.size < bg_rois_per_image
    bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
  elif fg_inds.size > 0:
    to_replace = fg_inds.size < rois_per_image
    fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
    fg_rois_per_image = rois_per_image
  elif bg_inds.size > 0:
    to_replace = bg_inds.size < rois_per_image
    bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
    fg_rois_per_image = 0
  else:
    # print("bg_inds is 0")
    # bg_inds = np.zeros(shape=[1,],dtype=np.int32)
    import pdb
    pdb.set_trace()

  # The indices that we're selecting (both fg and bg)
  keep_inds = np.append(fg_inds, bg_inds)
  # Select sampled values from various arrays:
  labels = labels[keep_inds]
  # Clamp labels for the background RoIs to 0
  labels[int(fg_rois_per_image):] = 0
  rois = all_rois[keep_inds]
  roi_scores = all_scores[keep_inds]

  bbox_target_data = _compute_targets(
    rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

  bbox_targets, bbox_inside_weights = \
    _get_bbox_regression_labels(bbox_target_data, num_classes)

  return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
Ejemplo n.º 4
0
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image,
                 num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    # notice:
    # the shape of overlap is N,K. And N is the number of proposal(include pred and gt), K is the number of gt
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                       & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]
    bbox_target_data = _compute_targets(rois[:, 1:5],
                                        gt_boxes[gt_assignment[keep_inds], :4],
                                        labels)
    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)
    return labels, rois, bbox_targets, bbox_inside_weights
Ejemplo n.º 5
0
    def create_roidb_from_box_list(self, box_list,
                                   gt_roidb):  # 通过 gt_roidb产生 roidb
        assert len(box_list) == self.num_images, \
          'Number of boxes must match number of ground-truth images'   #每张图片对应一个 box_list 一个 box_list 有好几个 box 多物体识别
        roidb = []
        for i in range(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]  #每个box_list的个数
            overlaps = np.zeros(
                (num_boxes, self.num_classes),
                dtype=np.float32)  #计算每个 box 与 每个ground_truth 的重叠面积

            if gt_roidb is not None and gt_roidb[i][
                    'boxes'].size > 0:  #groudth 存在 且 box 有尺寸
                gt_boxes = gt_roidb[i]['boxes']  #将 赋值给roidb
                gt_classes = gt_roidb[i]['gt_classes']  # 类别赋值
                gt_overlaps = bbox_overlaps(
                    boxes.astype(
                        np.float
                    ),  #roidb 是在 gt_roidb  基础上 与初始化的 anchor 形成的 overlaps
                    gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes':
                boxes,
                'gt_classes':
                np.zeros((num_boxes, ), dtype=np.int32),
                'gt_overlaps':
                overlaps,
                'flipped':
                False,
                'seg_areas':
                np.zeros((num_boxes, ), dtype=np.float32),
            })
        return roidb
Ejemplo n.º 6
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
            'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in range(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes),
                                dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(
                    axis=1)  #返回的是计算每个anchor和哪个GTobject的IOU最大,这个是相对GT的索引
                maxes = gt_overlaps.max(
                    axis=1)  #返回的是计算每个anchor和GTobject的IOU最大值
                I = np.where(maxes > 0)[0]  #返回max>0时,对应的索引,这个索引也是相对anchor的索引
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[
                    I]  #[anchorid,Classid[GTid]]=maxiOU
            #对于那些零元素数目远远多于非零元素数目,并且非零元素的分布没有规律的矩阵称为稀疏矩阵(sparse)。仅存储非零元素可使矩阵操作效率更高。也就是稀疏矩阵的计算速度更快
            #csr_matrix,创建稀疏矩阵
            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes':
                boxes,
                'gt_classes':
                np.zeros((num_boxes, ), dtype=np.int32),
                'gt_overlaps':
                overlaps,
                'flipped':
                False,
                'seg_areas':
                np.zeros((num_boxes, ), dtype=np.float32),
            })
        return roidb
Ejemplo n.º 7
0
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
  """Same as the anchor target layer in original Fast/er RCNN """
  # 每个像素点对应anchors 个数
  A = num_anchors

  # 总共anchors个数
  total_anchors = all_anchors.shape[0]

  # 总共像素点个数
  K = total_anchors / num_anchors

  # allow boxes to sit over the edge by a small amount
  _allowed_border = 0

  # map of shape (..., H, W)
  # feature map的高和宽
  height, width = rpn_cls_score.shape[1:3]

  # only keep anchors inside the image
  # 获取所有在图片尺寸内的anchors的indexes
  inds_inside = np.where(
    (all_anchors[:, 0] >= -_allowed_border) &
    (all_anchors[:, 1] >= -_allowed_border) &
    (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
    (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
  )[0]

  # keep only inside anchors
  # 只保留在图片内的anchors
  anchors = all_anchors[inds_inside, :]

  """
  label: 1 is positive, 0 is negative, -1 is dont care
  labels用来保存anchors的标签,1代表是正标签,即该anchor是正样本,即存在ground truth 和该anchor的IoU满足rpn正样本的要求;0代表负样本
  ;-1代表不关心该样本。正样本指当做前景物体的boxes,负样本指代表背景的boxes。
  关于rpn的正负样本的划分,如下描述:
  考察训练集中的每张图像(含有人工标定的ground true box)的所有anchors。
  a. 对每个标定的ground true box区域,与其重叠比例最大的anchor记为 正样本 (保证每个ground true 至少对应一个正样本anchor)
  b. 对a)剩余的anchor,如果其与某个标定区域重叠比例大于0.7,记为正样本(每个ground true box可能会对应多个正样本anchor。但每个正样本
  anchor 只可能对应一个ground true box);如果其与任意一个标定的重叠比例都小于0.3,记为负样本。
  c. 对a),b)剩余的anchor,弃去不用。
  d. 跨越图像边界的anchor弃去不用
  """
  labels = np.empty((len(inds_inside),), dtype=np.float32)
  labels.fill(-1)

  """
  overlaps between the anchors and the gt boxes
  overlaps (ex, gt)
  计算anchors boxes和groud_truth boxes的IoU,overlaps为二维,每行为一个anchors与所有gt boxes之间IoU大小。
  假设overlaps值如下:
  array([[0.48557798, 0.12723372, 0.22120988, 0.93015104],
         [0.41377397, 0.91378704, 0.10855037, 0.19918476],
         [0.86859926, 0.21069385, 0.48224216, 0.63019202],
         [0.38623382, 0.62772807, 0.91166306, 0.91971408],
         [0.25904543, 0.31076955, 0.5593479 , 0.41681275],
         [0.58562328, 0.18718799, 0.28859296, 0.57199318],
         [0.35817963, 0.94432939, 0.80717617, 0.84104114],
         [0.17751499, 0.42397581, 0.40260994, 0.10210093]])
  shape为(8, 4),即一副图片总共有8个anchors boxes和4个ground truth boxes。
  """
  overlaps = bbox_overlaps(
    np.ascontiguousarray(anchors, dtype=np.float),
    np.ascontiguousarray(gt_boxes, dtype=np.float))

  """
  找到每个anchors boxes IoU最大的gt boxes
  argmax_overlaps为array([3, 1, 0, 3, 2, 0, 1, 1])
  """
  argmax_overlaps = overlaps.argmax(axis=1)

  """
  每行最大的IoU组成max_overlaps数组
  max_overlaps为:
  array([0.93015104, 0.91378704, 0.86859926, 0.91971408, 0.5593479 , 0.58562328, 0.94432939, 0.42397581])
  """
  max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]

  """
  找到每个gt boxes IoU最大的anchors boxes
  gt_argmax_overlaps为:
  array([2, 6, 3, 0])
  """
  gt_argmax_overlaps = overlaps.argmax(axis=0)

  """
  每列最大的IoU组成gt_max_overlaps数组
  gt_max_overlaps为:
  array([0.86859926, 0.94432939, 0.91166306, 0.93015104])
  """
  gt_max_overlaps = overlaps[gt_argmax_overlaps,
                             np.arange(overlaps.shape[1])]

  """
  根据上述正样本划分的条件a,所以每个ground truth box至少得到一个anchor,gt_argmax_overlaps就是行号,就是对应的anchor,为:
  array([0, 2, 3, 6])。
  """
  gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

  if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
    """
    assign bg labels first so that positive labels can clobber them
    first set the negatives
    如果某个anchor最大IoU小于negative阈值,则先设置为负样本。
    """
    labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

  """
  fg label: for each gt, anchor with highest overlap
  满足条件a的anchors的labels设置为1。
  """
  labels[gt_argmax_overlaps] = 1

  """
  fg label: above threshold IOU
  根据上述正样本划分的条件b,剩余的anchors中,如果存在gt boxes与它的IoU大于阈值,则是正样本。
  """
  labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

  if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
    # assign bg labels last so that negative labels can clobber positives
    labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

  """
  subsample positive labels if we have too many
  如果正样本个数超过限定值则随机选择特定个。
  """
  num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
  fg_inds = np.where(labels == 1)[0]
  if len(fg_inds) > num_fg:
    disable_inds = npr.choice(
      fg_inds, size=(len(fg_inds) - num_fg), replace=False)
    labels[disable_inds] = -1

  """
  subsample negative labels if we have too many
  如果负样本个数超过限定值,则随机选择特定个。
  """
  num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
  bg_inds = np.where(labels == 0)[0]
  if len(bg_inds) > num_bg:
    disable_inds = npr.choice(
      bg_inds, size=(len(bg_inds) - num_bg), replace=False)
    labels[disable_inds] = -1

  bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
  """
  bbox_targets是anchors boxes和ground truth的偏移量,偏移量见lib.model.bbox_transorm.bbox_transform_inv_tf函数解释。
  该偏移量就是rpn的Bounding-box regression学习的目标。
  """
  bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

  bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
  # only the positive ones have regression targets
  bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

  bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
  if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
    # uniform weighting of examples (given non-uniform sampling)
    num_examples = np.sum(labels >= 0)
    positive_weights = np.ones((1, 4)) * 1.0 / num_examples
    negative_weights = np.ones((1, 4)) * 1.0 / num_examples
  else:
    assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
            (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
    positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                        np.sum(labels == 1))
    negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                        np.sum(labels == 0))
  bbox_outside_weights[labels == 1, :] = positive_weights
  bbox_outside_weights[labels == 0, :] = negative_weights

  # map up to original set of anchors
  """
  因为前边把不在图片尺寸内的anchors给删除了,这里再把labels扩充到原始anchors的个数,用-1来扩充。
  后边bbox_targets、bbox_inside_weights、bbox_outside_weights同理。
  """
  labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
  bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
  bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
  bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

  """
  假设labels为:
  array([0, 1, 0, 1, 0, 1, 0, 1])
  假设feature map有两行两列,每个像素点有两个anchors。
  经过转置和reshape,rpn_labels为:
  array([[[[0, 0],    ---feature map第一行两个像素点对应的第一个anchor的label
           [0, 0],    ---feature map第二行两个像素点对应的第一个anchor的label
           [1, 1],    ---feature map第一行两个像素点对应的第二个anchor的label
           [1, 1]]]]) ---feature map第二行两个像素点对应的第二个anchor的label
  shape为(1, 4, 2, 2)
  """
  labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
  labels = labels.reshape((1, 1, A * height, width))
  rpn_labels = labels

  # bbox_targets
  bbox_targets = bbox_targets \
    .reshape((1, height, width, A * 4))

  rpn_bbox_targets = bbox_targets
  # bbox_inside_weights
  bbox_inside_weights = bbox_inside_weights \
    .reshape((1, height, width, A * 4))

  rpn_bbox_inside_weights = bbox_inside_weights

  # bbox_outside_weights
  bbox_outside_weights = bbox_outside_weights \
    .reshape((1, height, width, A * 4))

  rpn_bbox_outside_weights = bbox_outside_weights
  return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Ejemplo n.º 8
0
def prepare_roidb(imdb):
    """Enrich the imdb's roidb by adding some derived quantities that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    cache_file = os.path.join(imdb.cache_path,
                              imdb.name + '_gt_roidb_prepared.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            imdb._roidb = cPickle.load(fid)
        print '{} gt roidb prepared loaded from {}'.format(
            imdb.name, cache_file)
        return

    roidb = imdb.roidb
    for i in xrange(len(imdb.image_index)):
        roidb[i]['image'] = imdb.image_path_at(i)
        boxes = roidb[i]['boxes']
        labels = roidb[i]['gt_classes']
        info_boxes = np.zeros((0, 18), dtype=np.float32)

        if boxes.shape[0] == 0:
            roidb[i]['info_boxes'] = info_boxes
            continue

        # compute grid boxes
        s = PIL.Image.open(imdb.image_path_at(i)).size
        image_height = s[1]
        image_width = s[0]
        boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)

        # for each scale
        for scale_ind, scale in enumerate(cfg.TRAIN.SCALES):
            boxes_rescaled = boxes * scale

            # compute overlap
            overlaps = bbox_overlaps(boxes_grid.astype(np.float),
                                     boxes_rescaled.astype(np.float))
            max_overlaps = overlaps.max(axis=1)
            argmax_overlaps = overlaps.argmax(axis=1)
            max_classes = labels[argmax_overlaps]

            # select positive boxes
            fg_inds = []
            for k in xrange(1, imdb.num_classes):
                fg_inds.extend(
                    np.where((max_classes == k)
                             & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])

            if len(fg_inds) > 0:
                gt_inds = argmax_overlaps[fg_inds]
                # bounding box regression targets
                gt_targets = _compute_targets(boxes_grid[fg_inds, :],
                                              boxes_rescaled[gt_inds, :])
                # scale mapping for RoI pooling
                scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind]
                scale_map = cfg.TRAIN.SCALES[scale_ind_map]
                # contruct the list of positive boxes
                # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
                info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
                info_box[:, 0] = cx[fg_inds]
                info_box[:, 1] = cy[fg_inds]
                info_box[:, 2] = scale_ind
                info_box[:, 3:7] = boxes_grid[fg_inds, :]
                info_box[:, 7] = scale_ind_map
                info_box[:, 8:12] = boxes_grid[fg_inds, :] * scale_map / scale
                info_box[:, 12] = labels[gt_inds]
                info_box[:, 14:] = gt_targets
                info_boxes = np.vstack((info_boxes, info_box))

        roidb[i]['info_boxes'] = info_boxes

    with open(cache_file, 'wb') as fid:
        cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
    print 'wrote gt roidb prepared to {}'.format(cache_file)
Ejemplo n.º 9
0
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors
    im_info = im_info[0]

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    # only keep anchors inside the image
    # 2 inds_inside = 所有的anchor中x1,y1,x2,y2没有超过图像边界的。
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    # labels 字段的长度就是合法的anchor的个数
    # 先用-1填充labels
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes ,
    # 3 根据预设阈值和overlap重叠率,打上前背景标签1|0
    # bbox_overlaps ()计算anchors与gt_boxes之间的重合度IOU,大于0.7标记为前景图,小于0.3标记为背景图;
    # 返回类型(n,k),即第n个anchors与第K个gt_boxes的IOU重合度值
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))

    # 4 对于每个anchor,找到与gt_box坐标的IOU的最大值,即找到每个anchors最大重叠率的gt_boxes。
    argmax_overlaps = overlaps.argmax(axis=1) # ? #
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # ? # anchors与gt_boxes最大IoU

    gt_argmax_overlaps = overlaps.argmax(axis=0) # ? #
    gt_max_overlaps = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])] # ? #

    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]#再次对于每个gt_box,找到对应的最大overlap的anchor。shape[len(gt_boxes),]

    if not cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives , 在这里将anchors与gt_boxes最大IoU仍然小于阈值(0.3)的某些anchor置0
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU , 在这里将anchors与gt_boxes最大IoU大于阈值(0.7)的某些anchor置1
    labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1

    if cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # 5 随机抛弃一些前景anchor和背景anchors. 二次采样
    # subsample positive labels if we have too many
    num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        labels[disable_inds] = -1 #如果事实存在的前景anchor大于了所需值,就随机抛弃一些前景anchor

    # subsample negative labels if we have too many
    num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1

    # 6 使用bbox_transform函数,计算每个anchor与最大的overlap的gt_boxes的框偏移量,
    # 作为位移量的标签值(tx,ty,th,tw)用于后续框回归
    # bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)  # 对每个在原图内部的anchor,用全0初始化坐标变换值
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])


    # bbox_inside_weights和bbox_outside_weights,这两个数组在训练anchor边框修正时有重大作用

    #7 在进行边框修正loss的计算时,只有前景anchor会起作用,可以看到这是bbox_inside_weights和bbox_outside_weights在实现。
    # 非前景和背景anchor对应的bbox_inside_weights和bbox_outside_weights都为0  @ https://blog.csdn.net/u012426298/article/details/81517609
    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(cfg.FLAGS2["bbox_inside_weights"])
    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)


    if cfg.FLAGS.rpn_positive_weight < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.FLAGS.rpn_positive_weight > 0) &
                (cfg.FLAGS.rpn_positive_weight < 1))
        positive_weights = (cfg.FLAGS.rpn_positive_weight /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    # 8.统一所有的标签,并转化标签labels的格式后,返回
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) # ? #
    bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)  # ? #

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
    def evaluate_recall(self, candidate_boxes=None, thresholds=None,
                        area='all', limit=None):
        """Evaluate detection proposal recall metrics.

        Returns:
            results: dictionary of results with keys
                'ar': average recall
                'recalls': vector recalls at each IoU overlap threshold
                'thresholds': vector of IoU overlap thresholds
                'gt_overlaps': vector of all ground-truth overlaps
        """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3,
                 '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
        area_ranges = [[0 ** 2, 1e5 ** 2],  # all
                       [0 ** 2, 32 ** 2],  # small
                       [32 ** 2, 96 ** 2],  # medium
                       [96 ** 2, 1e5 ** 2],  # large
                       [96 ** 2, 128 ** 2],  # 96-128
                       [128 ** 2, 256 ** 2],  # 128-256
                       [256 ** 2, 512 ** 2],  # 256-512
                       [512 ** 2, 1e5 ** 2],  # 512-inf
                       ]
        assert area in areas, 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in range(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
                               (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0]) &
                                     (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in range(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert (gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert (_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
                'gt_overlaps': gt_overlaps}
Ejemplo n.º 11
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
    # 生成前景和背景roi
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    # 选取前景roi,与IOU最大的GT的IOU超过0.5,被认为前景
    fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    # 选取背景roi,与IOU最大的GT的IOU低于0.5,高于0.1,标记为背景
    bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) &
                       (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0]

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = 0
    else:
        import pdb
        pdb.set_trace()

    # The indices that we're selecting (both fg and bg)
    # 保留的roi索引
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    # 取roi标签
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    # 将被标记为背景的roi的标签置为0
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]

    # bbox的rcnn回归目标值
    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    # bbox_inside_weights:这个是干啥的?每个box的损失权重?
    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
Ejemplo n.º 12
0
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    """Same as the anchor target layer in original Faster RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors
    im_info = im_info[0]

    # allow boxes to sit over the edge by a small amount
    # 是否允许anchor box越过图片边界
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    # only keep anchors inside the image
    # 只保留在图内的anchor
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    # 仅保留界内的anchor box
    anchors = all_anchors[inds_inside, :]

    # 为anchor box设置标签
    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # 计算anchor和gt的iou
    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    # 计算出所有anchor和gt_boxes的IOU
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    print('anchors iou with gt-boxes shape:', overlaps)
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        # 先设置负例bbox,与任意gt的iou小于0.3均为负例(背景)
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # fg label: for each gt, anchor with highest overlap
    # 1、是任意gt的最大iou 的bbox,均设为正例(前景)
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    # 2、与任意gt的iou大于0.7,均设为正例(前景)
    labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1

    if cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # subsample positive labels if we have too many
    # 我们最终给出的roi数量是256,其中正例不允许超过50%,如果超出,随机将一部分设为不关心
    num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    # 负例数量 = 256-正例数量,如果超出,随机去掉多余的
    num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    # TODO 这是为bbox设置回归目标值! 有空看下目标值到底是不是偏移量
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.FLAGS2["bbox_inside_weights"])

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.FLAGS.rpn_positive_weight < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.FLAGS.rpn_positive_weight > 0) &
                (cfg.FLAGS.rpn_positive_weight < 1))
        positive_weights = (cfg.FLAGS.rpn_positive_weight /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    # 对labels reshape
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    # 对回归值 reshape
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):  #rpn_cls score 是0 或者1
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]  #获取map大小

    # only keep anchors inside the image   #保证 xmin>0 ymin>0
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width bolb
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)  #labels 初始化为-1
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes                   #此刻终于找到 overlaps 的 函数 此刻 两这结合起来碰撞出火花
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(
        axis=0)  #那一列大值 一般是 与gt 所围成的面积的最大值 的索引标签给1
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[
            max_overlaps < cfg.TRAIN.
            RPN_NEGATIVE_OVERLAP] = 0  #每个anchor的最大max_ovlaps<0.3的话 肯定labels 为背景 设为0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1  #前景全部设为1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.
           RPN_POSITIVE_OVERLAP] = 1  #IOU >= thresh: positive example IOU>0.7

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many  cfg.TRAIN.RPN_BATCHSIZE=256
    num_fg = int(
        cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE
    )  # Max number of foreground examples __C.TRAIN.RPN_FG_FRACTION = 0.5 Total number of examples

    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
      .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Ejemplo n.º 14
0
    def _load_rpn_roidb(self, gt_roidb, model):

        box_list = []
        for ix, index in enumerate(self.image_index):
            filename = os.path.join(self._imagenet3d_path, 'region_proposals',
                                    model, index + '.txt')
            assert os.path.exists(filename), \
                '{} data not found at: {}'.format(model, filename)
            raw_data = np.loadtxt(filename, dtype=float)
            if len(raw_data.shape) == 1:
                if raw_data.size == 0:
                    raw_data = raw_data.reshape((0, 5))
                else:
                    raw_data = raw_data.reshape((1, 5))

            if model == 'selective_search' or model == 'mcg':
                x1 = raw_data[:, 1].copy()
                y1 = raw_data[:, 0].copy()
                x2 = raw_data[:, 3].copy()
                y2 = raw_data[:, 2].copy()
            elif model == 'edge_boxes':
                x1 = raw_data[:, 0].copy()
                y1 = raw_data[:, 1].copy()
                x2 = raw_data[:, 2].copy() + raw_data[:, 0].copy()
                y2 = raw_data[:, 3].copy() + raw_data[:, 1].copy()
            elif model == 'rpn_caffenet' or model == 'rpn_vgg16':
                x1 = raw_data[:, 0].copy()
                y1 = raw_data[:, 1].copy()
                x2 = raw_data[:, 2].copy()
                y2 = raw_data[:, 3].copy()
            else:
                assert 1, 'region proposal not supported: {}'.format(model)

            inds = np.where((x2 > x1) & (y2 > y1))[0]
            raw_data[:, 0] = x1
            raw_data[:, 1] = y1
            raw_data[:, 2] = x2
            raw_data[:, 3] = y2
            raw_data = raw_data[inds, :4]

            self._num_boxes_proposal += raw_data.shape[0]
            box_list.append(raw_data)
            print 'load {}: {}'.format(model, index)

            if gt_roidb is not None:
                # compute overlaps between region proposals and gt boxes
                boxes = gt_roidb[ix]['boxes'].copy()
                gt_classes = gt_roidb[ix]['gt_classes'].copy()
                # compute overlap
                overlaps = bbox_overlaps(raw_data.astype(np.float),
                                         boxes.astype(np.float))
                # check how many gt boxes are covered by anchors
                if raw_data.shape[0] != 0:
                    max_overlaps = overlaps.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return self.create_roidb_from_box_list(box_list, gt_roidb)
Ejemplo n.º 15
0
    def _load_imagenet3d_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the imagenet3d format.
        """

        if self._image_set == 'test' or self._image_set == 'test_1' or self._image_set == 'test_2':
            lines = []
        else:
            filename = os.path.join(self._imagenet3d_path, 'Labels',
                                    index + '.txt')
            lines = []
            with open(filename) as f:
                for line in f:
                    lines.append(line)

        num_objs = len(lines)

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        viewpoints = np.zeros(
            (num_objs, 3),
            dtype=np.float32)  # azimuth, elevation, in-plane rotation
        viewpoints_flipped = np.zeros(
            (num_objs, 3),
            dtype=np.float32)  # azimuth, elevation, in-plane rotation
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        for ix, line in enumerate(lines):
            words = line.split()
            assert len(words) == 5 or len(
                words) == 8, 'Wrong label format: {}'.format(index)
            cls = self._class_to_ind[words[0]]
            boxes[ix, :] = [float(n) for n in words[1:5]]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0
            if len(words) == 8:
                viewpoints[ix, :] = [float(n) for n in words[5:8]]
                # flip the viewpoint
                viewpoints_flipped[ix, 0] = -viewpoints[ix, 0]  # azimuth
                viewpoints_flipped[ix, 1] = viewpoints[ix, 1]  # elevation
                viewpoints_flipped[ix,
                                   2] = -viewpoints[ix, 2]  # in-plane rotation
            else:
                viewpoints[ix, :] = np.inf
                viewpoints_flipped[ix, :] = np.inf

        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)
        viewindexes_azimuth = np.zeros((num_objs, self.num_classes),
                                       dtype=np.float32)
        viewindexes_azimuth_flipped = np.zeros((num_objs, self.num_classes),
                                               dtype=np.float32)
        viewindexes_elevation = np.zeros((num_objs, self.num_classes),
                                         dtype=np.float32)
        viewindexes_elevation_flipped = np.zeros((num_objs, self.num_classes),
                                                 dtype=np.float32)
        viewindexes_rotation = np.zeros((num_objs, self.num_classes),
                                        dtype=np.float32)
        viewindexes_rotation_flipped = np.zeros((num_objs, self.num_classes),
                                                dtype=np.float32)

        overlaps = scipy.sparse.csr_matrix(overlaps)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)
        viewindexes_azimuth = scipy.sparse.csr_matrix(viewindexes_azimuth)
        viewindexes_azimuth_flipped = scipy.sparse.csr_matrix(
            viewindexes_azimuth_flipped)
        viewindexes_elevation = scipy.sparse.csr_matrix(viewindexes_elevation)
        viewindexes_elevation_flipped = scipy.sparse.csr_matrix(
            viewindexes_elevation_flipped)
        viewindexes_rotation = scipy.sparse.csr_matrix(viewindexes_rotation)
        viewindexes_rotation_flipped = scipy.sparse.csr_matrix(
            viewindexes_rotation_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                base_size = 16
                ratios = cfg.TRAIN.RPN_ASPECTS
                scales = cfg.TRAIN.RPN_SCALES
                anchors = generate_anchors(base_size, ratios, scales)
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_viewpoints': viewpoints,
            'gt_viewpoints_flipped': viewpoints_flipped,
            'gt_viewindexes_azimuth': viewindexes_azimuth,
            'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped,
            'gt_viewindexes_elevation': viewindexes_elevation,
            'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped,
            'gt_viewindexes_rotation': viewindexes_rotation,
            'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Ejemplo n.º 16
0
    def _load_kitti_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the KITTI format.
        """

        if self._image_set == 'test':
            lines = []
        else:
            filename = os.path.join(self._data_path, 'training', 'label_2',
                                    index + '.txt')
            lines = []
            with open(filename) as f:
                for line in f:
                    line = line.replace('Van', 'Car')
                    words = line.split()
                    cls = words[0]
                    truncation = float(words[1])
                    occlusion = int(words[2])
                    height = float(words[7]) - float(words[5])
                    if cls in self._class_to_ind and truncation < 0.5 and occlusion < 3 and height > 25:
                        lines.append(line)

        num_objs = len(lines)

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        for ix, line in enumerate(lines):
            words = line.split()
            cls = self._class_to_ind[words[0]]
            boxes[ix, :] = [float(n) for n in words[4:8]]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                anchors = generate_anchors()
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Ejemplo n.º 17
0
def _sample_rois(all_rois, all_scores, gt_boxes, gt_phrases, fg_rois_per_image,
                 rois_per_image):
    """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]
    phrases = gt_phrases[gt_assignment]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                       & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if cfg.SAMPLE_NUM_FIXED_REGIONS:
        if fg_inds.size > 0 and bg_inds.size > 0:
            fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
            fg_inds = npr.choice(fg_inds,
                                 size=int(fg_rois_per_image),
                                 replace=False)
            bg_rois_per_image = rois_per_image - fg_rois_per_image
            to_replace = bg_inds.size < bg_rois_per_image
            bg_inds = npr.choice(bg_inds,
                                 size=int(bg_rois_per_image),
                                 replace=to_replace)
        elif fg_inds.size > 0:
            to_replace = fg_inds.size < rois_per_image
            fg_inds = npr.choice(fg_inds,
                                 size=int(rois_per_image),
                                 replace=to_replace)
            fg_rois_per_image = rois_per_image
        elif bg_inds.size > 0:
            to_replace = bg_inds.size < rois_per_image
            bg_inds = npr.choice(bg_inds,
                                 size=int(rois_per_image),
                                 replace=to_replace)
            fg_rois_per_image = 0
        else:
            import pdb
            pdb.set_trace()
    else:
        # foreground RoIs
        fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
        # Sample foreground regions without replacement
        if fg_inds.size > 0:
            fg_inds = npr.choice(fg_inds,
                                 size=fg_rois_per_this_image,
                                 replace=False)

        # Compute number of background RoIs to take from this image (guarding
        # against there being fewer than desired)
        bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
        bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
        # Sample background regions without replacement
        if bg_inds.size > 0:
            bg_inds = npr.choice(bg_inds,
                                 size=bg_rois_per_this_image,
                                 replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    phrases = phrases[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    phrases[int(fg_rois_per_image):, :] = 0
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]

    bbox_target_data = _compute_targets(rois[:, 1:5],
                                        gt_boxes[gt_assignment[keep_inds], :4],
                                        labels)

    if cfg.DEBUG_ALL:
        target_boxes = compute_rois_offset(rois[:, 1:5], bbox_target_data[:,
                                                                          1:5])
        match_boxes = gt_boxes[gt_assignment[keep_inds], :4]
        print('boxes consistency check')
        print(target_boxes[:2, :])
        print(match_boxes[:2, :])
        assert np.linalg.norm(target_boxes - match_boxes) < 0.01

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, phrases
Ejemplo n.º 18
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # print("gt_boxes: {}".format(gt_boxes))
    # print("all_rois: {}".format(all_rois))
    # print("all_scores: {}".format(all_scores))
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) &
                       (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0]
    # print("fg_inds {} bg_inds {} before modifications".format(fg_inds, bg_inds))
    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = 0
    else:
        #import pdb
        #pdb.set_trace()
        raise Exception() # Raise exception instead of debugging.

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]

    # print("rois: {}".format(rois))
    # print("gt_boxes: {}".format(gt_boxes[gt_assignment[keep_inds], :4]))
    # print("labels {}".format(labels))
    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
    # print("bbox_target_data: {}".format(bbox_target_data))
    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image,
                 rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]  #在此处计算标签
    """
    tf.app.flags.DEFINE_float('roi_fg_threshold', 0.5, "Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)")
    tf.app.flags.DEFINE_float('roi_bg_threshold_high', 0.5, "Overlap threshold for a ROI to be considered background (class = 0 if overlap in [LO, HI))")
    tf.app.flags.DEFINE_float('roi_bg_threshold_low', 0.1, "Overlap threshold for a ROI to be considered background (class = 0 if overlap in [LO, HI))")
    """
    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high)
                       & (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0]

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds,
                             size=int(fg_rois_per_image),
                             replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        # 当为true时,则表明正样本减少了,负样本需要增多
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(bg_rois_per_image),
                             replace=to_replace)
    # 只有前景时
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = rois_per_image
    # 只有背景时
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = 0
    else:
        raise Exception()

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0  # 这里的label是指真实类别标签 0-20
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]

    #  第一列是标签,后四列是偏移量
    bbox_target_data = _compute_targets(rois[:, 1:5],
                                        gt_boxes[gt_assignment[keep_inds], :4],
                                        labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
Ejemplo n.º 20
0
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors
    im_info = im_info[0]

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1

    if cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.FLAGS2["bbox_inside_weights"])

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.FLAGS.rpn_positive_weight < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.FLAGS.rpn_positive_weight > 0) &
                (cfg.FLAGS.rpn_positive_weight < 1))
        positive_weights = (cfg.FLAGS.rpn_positive_weight /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Ejemplo n.º 21
0
def anchor_target_layer(rpn_cls_prob, gt_boxes, gt_points, im_info, _feat_stride, all_anchors, num_anchors, target_name):
    # def anchor_target_layer(anchor_hw, rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors
    hard_mining = cfg.TRAIN.HARD_POSITIVE_MINING

    # allow boxes to sit over the edge by a small amount
    # _allowed_border = 0
    # follow the SSH setting
    if target_name == "M3":
        _allowed_border = 512
    else:
        _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_prob.shape[1:3]

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # only keep anchors inside anchors
    # keep away the problem of ‘ValueError: attempt to get argmax of an empty sequence’ during training
    if inds_inside.shape[0] == 0:
        # If no anchors inside use whatever anchors we have
        inds_inside = np.arange(0, total_anchors)

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))

    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    if cfg.TRAIN.FORCE_FG_FOR_EACH_GT:
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
        labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    ################################### Subsample positive labels ##################################
    # subsample positive labels if we have too many
    # num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    # fg_inds = np.where(labels == 1)[0]
    # if len(fg_inds) > num_fg:
    #     disable_inds = npr.choice(
    #         fg_inds, size=(len(fg_inds) - num_fg), replace=False)
    #     labels[disable_inds] = -1

    ##################### Add OHEM for subsample positive labels(Online Hard Examples Mining) ##########
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        if hard_mining:
            ohem_scores = rpn_cls_prob[:, :, :, num_anchors:]
            ohem_scores = ohem_scores.reshape((-1, 1))
            ohem_scores = ohem_scores[inds_inside]
            pos_ohem_scores = 1 - ohem_scores[fg_inds]
            order_pos_ohem_scores = pos_ohem_scores.ravel().argsort()[::-1]
            ohem_sampled_fgs = fg_inds[order_pos_ohem_scores[:num_fg]]
            labels[fg_inds] = -1
            labels[ohem_sampled_fgs] = 1
        else:
            disable_inds = npr.choice(
                fg_inds, size=(len(fg_inds) - num_fg), replace=False)
            labels[disable_inds] = -1
    ########################################## End ##################################################


    ##########################################Subsample negative labels #############################
    # subsample negative labels if we have too many
    # num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    # bg_inds = np.where(labels == 0)[0]
    # if len(bg_inds) > num_bg:
    #     disable_inds = npr.choice(
    #         bg_inds, size=(len(bg_inds) - num_bg), replace=False)
    #     labels[disable_inds] = -1

    ################# Add OHEM for subsample negative labels(Online Hard Examples Mining) ############
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        if not hard_mining:
            # randomly sub-sampling negatives
            disable_inds = npr.choice(
                bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            labels[disable_inds] = -1
        else:
            # sort ohem scores
            ohem_scores = rpn_cls_prob[:, :, :, num_anchors:]
            ohem_scores = ohem_scores.reshape((-1, 1))
            ohem_scores = ohem_scores[inds_inside]
            neg_ohem_scores = ohem_scores[bg_inds]
            order_neg_ohem_scores = neg_ohem_scores.ravel().argsort()[::-1]
            ohem_sampled_bgs = bg_inds[order_neg_ohem_scores[:num_bg]]
            labels[bg_inds] = -1
            labels[ohem_sampled_bgs] = 0
    ########################################## End ##############################################

    # Compute boxes regression targets
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_bboxes_targets(anchors, gt_boxes[argmax_overlaps, :])

    # Compute kpoints offset targets
    kpoints_targets = np.zeros((len(inds_inside), 10), dtype=np.float32)
    kpoints_targets = _compute_kpoints_targets(anchors, gt_points[argmax_overlaps, :10])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    kpoints_inside_weights = np.zeros((len(inds_inside), 10), dtype=np.float32)
    # only the positive ones have regression targets
    kpoints_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_KPOINTS_POSITIVE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    kpoints_outside_weights = np.zeros((len(inds_inside), 10), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        kpoints_positive_weights = np.ones((1, 10)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))
        kpoints_positive_weights = (cfg.TRAIN.RPN_KPOINTS_POSITIVE_WEIGHTS /
                            np.sum(labels == 1))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights
    kpoints_outside_weights[labels == 1, :] = kpoints_positive_weights
    if gt_points.size > 0:
        gt_points_flag = gt_points[argmax_overlaps, 10]
        gt_points_flag = np.array(gt_points_flag, np.int32)
        kpoints_inside_weights[gt_points_flag == 0, :] = np.array(cfg.TRAIN.RPN_KPOINTS_WEIGHTS_NON)
        kpoints_outside_weights[gt_points_flag == 0, :] = np.array(cfg.TRAIN.RPN_KPOINTS_WEIGHTS_NON)

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    kpoints_targets = _unmap(kpoints_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
    kpoints_inside_weights = _unmap(kpoints_inside_weights, total_anchors, inds_inside, fill=0)
    kpoints_outside_weights = _unmap(kpoints_outside_weights, total_anchors, inds_inside, fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))

    # kpoints_targets
    kpoints_targets = kpoints_targets.reshape((1, height, width, A * 10))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights

    # kpoints_inside_weights
    kpoints_inside_weights = kpoints_inside_weights.reshape((1, height, width, A * 10))
    rpn_kpoints_inside_weights = kpoints_inside_weights

    # kpoints_outside_weights
    kpoints_outside_weights = kpoints_outside_weights.reshape((1, height, width, A * 10))
    rpn_kpoints_outside_weights = kpoints_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, kpoints_targets, \
           rpn_kpoints_inside_weights, rpn_kpoints_outside_weights
Ejemplo n.º 22
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
  """Generate a random sample of RoIs comprising foreground and background
  examples.
  """

  """
  overlaps: (rois x gt_boxes)
  计算proposal boxes与ground truth boxes的IoU
  """
  overlaps = bbox_overlaps(
    np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
    np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
  gt_assignment = overlaps.argmax(axis=1)
  max_overlaps = overlaps.max(axis=1)

  # labels保存ground truth boxes的类型标签
  labels = gt_boxes[gt_assignment, 4]

  """
  Select foreground RoIs as those with >= FG_THRESH overlap
  根据IoU和前景阈值,计算所有前景的proposal boxes,也就是包含物体的proposal boxes。
  """
  fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]

  """
  Guard against the case when an image has fewer than fg_rois_per_image
  Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
  根据IoU和背景阈值,计算所有背景的proposal boxes。
  """
  bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                     (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]

  """
  Small modification to the original version where we ensure a fixed number of regions are sampled
  选取对应数据量的前景和背景proposal boxes。
  """
  if fg_inds.size > 0 and bg_inds.size > 0:
    fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
    fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
    bg_rois_per_image = rois_per_image - fg_rois_per_image
    to_replace = bg_inds.size < bg_rois_per_image
    bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
  elif fg_inds.size > 0:
    to_replace = fg_inds.size < rois_per_image
    fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
    fg_rois_per_image = rois_per_image
  elif bg_inds.size > 0:
    to_replace = bg_inds.size < rois_per_image
    bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
    fg_rois_per_image = 0
  else:
    import pdb
    pdb.set_trace()

  """
  The indices that we're selecting (both fg and bg)
  连接一下前景和背景proposal boxes的indexes
  """
  keep_inds = np.append(fg_inds, bg_inds)

  # Select sampled values from various arrays:
  labels = labels[keep_inds]

  """
  Clamp labels for the background RoIs to 0
  将背景proposal boxes的label设置为0
  """
  labels[int(fg_rois_per_image):] = 0

  """
  选取一下留下来的rois和roi_scores。
  """
  rois = all_rois[keep_inds]
  roi_scores = all_scores[keep_inds]

  """
  计算一下留下来的proposal boxes与对应ground truth boxes之间的偏移量。
  bbox_target_data的shape为:
  (len(keep_inds), 5),其中第2维中第一个元素为label,后四个元素为偏移量。
  """
  bbox_target_data = _compute_targets(
    rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

  """
  bbox_targets的shape为N × 4k,N代表proposal boxes的个数,4k则是指对每一个类别都有四个值对应偏移量。
  比如如果一个proposal box的类别是1,则bbox_targets对应该proposal box的行第4元素~第7个元素代表偏移量,其他值均为0。特殊的,对于背景,
  对应的偏移量为0。
  """
  bbox_targets, bbox_inside_weights = \
    _get_bbox_regression_labels(bbox_target_data, num_classes)

  return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
Ejemplo n.º 23
0
    def _load_pascal_annotation(self, index):
        """
        Load image and bounding boxes info from XML file in the PASCAL VOC
        format.
        """
        filename = os.path.join(self._data_path, 'Annotations', index + '.xml')

        # print 'Loading: {}'.format(filename)
        def get_data_from_tag(node, tag):
            return node.getElementsByTagName(tag)[0].childNodes[0].data

        with open(filename) as f:
            data = minidom.parseString(f.read())

        objs = data.getElementsByTagName('object')
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        # Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            # Make pixel indexes 0-based
            x1 = float(get_data_from_tag(obj, 'xmin')) - 1
            y1 = float(get_data_from_tag(obj, 'ymin')) - 1
            x2 = float(get_data_from_tag(obj, 'xmax')) - 1
            y2 = float(get_data_from_tag(obj, 'ymax')) - 1
            name = str(get_data_from_tag(obj, "name")).lower().strip()
            if name in self._classes:
                cls = self._class_to_ind[name]
            else:
                cls = 0
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                anchors = generate_anchors()
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Ejemplo n.º 24
0
def _sample_rois(all_rois, gt_boxes, dontcare_areas, fg_rois_per_image,
                 rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    gt_num = gt_boxes.shape[0]
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    if DEBUG:
        print 'max_overlaps', overlaps[:-gt_num, :].max(axis=0)

    # preclude dontcare areas
    ignore_inds = np.empty(shape=(0), dtype=int)
    if dontcare_areas is not None and dontcare_areas.shape[0] > 0:
        # intersec shape is D x R
        intersecs = bbox_intersections(
            np.ascontiguousarray(dontcare_areas, dtype=np.float),  # D x 4
            np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float)  # R x 4
        )
        intersecs_sum = intersecs.sum(axis=0)  # R x 1
        ignore_inds = np.append(
            ignore_inds,
            np.where(
                intersecs_sum > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI)[0])

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    fg_inds = np.setdiff1d(fg_inds, ignore_inds)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                       & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    bg_inds = np.setdiff1d(bg_inds, ignore_inds)

    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
    # fg_rois_per_this_image = int(min(bg_inds.size, fg_inds.size))
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)

    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # bg_rois_per_this_image = fg_rois_per_this_image
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]

    # bbox_target_data = np.zeros((rois.shape[0], 5))
    # _compute_targets(all_rois[fg_inds, 1:5], gt_boxes[gt_assignment[fg_inds], :4], labels[:len(fg_inds)])
    bbox_target_data = _compute_targets(rois[:, 1:5],
                                        gt_boxes[gt_assignment[keep_inds], :4],
                                        labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, bbox_targets, bbox_inside_weights
Ejemplo n.º 25
0
    def _load_pascal3d_voxel_exemplar_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the pascal subcategory exemplar format.
        """

        if self._image_set == 'val':
            return self._load_pascal_annotation(index)

        filename = os.path.join(self._pascal3d_path, cfg.SUBCLS_NAME,
                                index + '.txt')
        assert os.path.exists(filename), \
                'Path does not exist: {}'.format(filename)

        # the annotation file contains flipped objects
        lines = []
        lines_flipped = []
        with open(filename) as f:
            for line in f:
                words = line.split()
                subcls = int(words[1])
                is_flip = int(words[2])
                if subcls != -1:
                    if is_flip == 0:
                        lines.append(line)
                    else:
                        lines_flipped.append(line)

        num_objs = len(lines)

        # store information of flipped objects
        assert (num_objs == len(lines_flipped)
                ), 'The number of flipped objects is not the same!'
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)

        for ix, line in enumerate(lines_flipped):
            words = line.split()
            subcls = int(words[1])
            gt_subclasses_flipped[ix] = subcls

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)

        for ix, line in enumerate(lines):
            words = line.split()
            cls = self._class_to_ind[words[0]]
            subcls = int(words[1])
            # Make pixel indexes 0-based
            boxes[ix, :] = [float(n) - 1 for n in words[3:7]]
            gt_classes[ix] = cls
            gt_subclasses[ix] = subcls
            overlaps[ix, cls] = 1.0
            subindexes[ix, cls] = subcls
            subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix]

        overlaps = scipy.sparse.csr_matrix(overlaps)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                base_size = 16
                ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25]
                scales = 2**np.arange(1, 6, 0.5)
                anchors = generate_anchors(base_size, ratios, scales)
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Ejemplo n.º 26
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image,
                 rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.  gt_boxes:[6, 8],     all_rois[2000, 8]
    """
    print(
        "------------------------------------------------------------------------------------------------------------\r\n"
    )
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5],
                             dtype=np.float),  # all_rois[1:5]是box  [5:] 是dis
        np.ascontiguousarray(gt_boxes[:, :4],
                             dtype=np.float))  # gt_boxes[:4]是box   [5:] 是dis
    # print(overlaps.shape) (2000, 3)
    gt_assignment = overlaps.argmax(
        axis=1)  # 1表示行最大值地方  gt_assignment.shape = 2000
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    print(max_overlaps, "\r\n", gt_boxes[gt_assignment, 5:])
    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high)
                       & (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0]

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds,
                             size=int(fg_rois_per_image),
                             replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(bg_rois_per_image),
                             replace=to_replace)
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = 0
    else:
        # fg_inds = []
        # bg_inds = cfg.bg_inds
        # # print("fg_rois_per_image", type(fg_rois_per_image), fg_rois_per_image)
        # fg_rois_per_image = 0
        # pass
        # # print(fg_inds.size, bg_inds.size)
        import pdb
        pdb.set_trace()  # pdb调试程序,程序停止。

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    # print("check", type(labels), labels, "\r\n python", keep_inds)
    labels = labels[np.array(keep_inds, dtype=np.int)]
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds]  # (256, 8)
    roi_scores = all_scores[keep_inds]

    # bbox_target_data.shape (256, 5)
    bbox_target_data = _compute_targets(rois[:, 1:5],
                                        gt_boxes[gt_assignment[keep_inds], :4],
                                        labels)

    gt_dis_label = gt_boxes[gt_assignment[keep_inds],
                            5:]  # (256, 3)这个是经过处理后的所有dis 标准label

    # print(gt_dis_label)
    # bbox_targets.shape (256, 40)  dis_targets (256, 30)
    bbox_targets, dis_targets, bbox_inside_weights, dis_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, gt_dis_label, num_classes)
    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, dis_inside_weights, dis_targets