def bbox_vote(dets_NMS, dets_all, thresh=0.8):
    dets_voted = np.zeros_like(dets_NMS)   # Empty matrix with the same shape and type

    _overlaps = bbox_overlaps(
      np.ascontiguousarray(dets_NMS[:, 0:4], dtype=np.float),
      np.ascontiguousarray(dets_all[:, 0:4], dtype=np.float))

    # for each survived box
    for i, det in enumerate(dets_NMS):
        dets_overlapped = dets_all[np.where(_overlaps[i, :] >= thresh)[0]]
        assert(len(dets_overlapped) > 0)

        boxes = dets_overlapped[:, 0:4]
        scores = dets_overlapped[:, 4]
        out_box = np.dot(scores, boxes)
        dets_voted[i][0:4] = out_box / sum(scores)        # Weighted bounding boxes
        dets_voted[i][4] = det[4]                         # Keep the original score
        # Weighted scores (if enabled)
        BBOX_VOTE_N_WEIGHTED_SCORE=1
        BBOX_VOTE_WEIGHT_EMPTY=0.5
        if BBOX_VOTE_N_WEIGHTED_SCORE > 1:
            n_agreement = BBOX_VOTE_N_WEIGHTED_SCORE
            w_empty = BBOX_VOTE_WEIGHT_EMPTY

            n_detected = len(scores)

            if n_detected >= n_agreement:
                top_scores = -np.sort(-scores)[:n_agreement]
                new_score = np.average(top_scores)
            else:
                new_score = np.average(scores) * (n_detected * 1.0 + (n_agreement - n_detected) * w_empty) / n_agreement

            dets_voted[i][4] = min(new_score, dets_voted[i][4])
    return dets_voted
Beispiel #2
0
def image_eval(pred, gt, ignore, iou_thresh):
    """ single image evaluation
    pred: Nx5
    gt: Nx4
    ignore:
    """

    _pred = pred.copy()
    _gt = gt.copy()
    pred_recall = np.zeros(_pred.shape[0])
    recall_list = np.zeros(_gt.shape[0])
    proposal_list = np.ones(_pred.shape[0])

    _pred[:, 2] = _pred[:, 2] + _pred[:, 0]
    _pred[:, 3] = _pred[:, 3] + _pred[:, 1]
    _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
    _gt[:, 3] = _gt[:, 3] + _gt[:, 1]

    overlaps = bbox_overlaps(_pred[:, :4], _gt)

    for h in range(_pred.shape[0]):

        gt_overlap = overlaps[h]
        max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
        if max_overlap >= iou_thresh:
            if ignore[max_idx] == 0:
                recall_list[max_idx] = -1
                proposal_list[h] = -1
            elif recall_list[max_idx] == 0:
                recall_list[max_idx] = 1

        r_keep_index = np.where(recall_list == 1)[0]
        pred_recall[h] = len(r_keep_index)
    return pred_recall, proposal_list
Beispiel #3
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
          'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in range(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes),
                                dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes':
                boxes,
                'gt_classes':
                np.zeros((num_boxes, ), dtype=np.int32),
                'gt_overlaps':
                overlaps,
                'flipped':
                False,
                'seg_areas':
                np.zeros((num_boxes, ), dtype=np.float32),
            })
        return roidb
Beispiel #4
0
    def __call__(self, image, boxes=None, labels=None):
        crop_height = self.crop_height
        crop_width = self.crop_width
        center_crop = self.center_crop
        image_height, image_width = image.shape[0], image.shape[1]
        max_offset_height = image_height - self.crop_height + 1
        max_offset_width = image_width - self.crop_width + 1

        if center_crop == True:
            offset_height = (image_height - self.crop_height) / 2
            offset_width = (image_width - self.crop_width) / 2
        else:
            offset_height = np.random.randint(low=0,
                                              high=max_offset_height,
                                              size=(1, ))[0]
            offset_width = np.random.randint(low=0,
                                             high=max_offset_width,
                                             size=(1, ))[0]

        cropped_im = image[offset_height:offset_height + crop_height,
                           offset_width:offset_width + crop_width, :]

        if boxes.shape[0] == 0:
            return cropped_im, boxes, labels

        ori_boxes = boxes.copy()
        boxes[:, 0] = np.maximum(boxes[:, 0], offset_width)
        boxes[:, 1] = np.maximum(boxes[:, 1], offset_height)
        boxes[:, 2] = np.minimum(boxes[:, 2], offset_width + crop_width - 1)
        boxes[:, 3] = np.minimum(boxes[:, 3], offset_height + crop_height - 1)

        tovlp = bbox_overlaps(boxes.astype(np.float64),
                              ori_boxes.astype(np.float64))
        argmax_tovlp = tovlp.argmax(axis=1)
        max_toplp = tovlp[np.arange(tovlp.shape[0]), argmax_tovlp]

        labelRect = ori_boxes.copy()
        labelRect[:, 0] -= offset_width
        labelRect[:, 1] -= offset_height
        labelRect[:, 2] -= offset_width
        labelRect[:, 3] -= offset_height

        labelRect[:, 0] = np.minimum(crop_width - 1,
                                     np.maximum(0, labelRect[:, 0]))
        labelRect[:, 1] = np.minimum(crop_height - 1,
                                     np.maximum(0, labelRect[:, 1]))
        labelRect[:, 2] = np.minimum(crop_width - 1,
                                     np.maximum(0, labelRect[:, 2]))
        labelRect[:, 3] = np.minimum(crop_height - 1,
                                     np.maximum(0, labelRect[:, 3]))

        invalid_idx = np.logical_or(labelRect[:, 2] <= labelRect[:, 0],
                                    labelRect[:, 3] <= labelRect[:, 1])
        invalid_idx = np.logical_or(invalid_idx, max_toplp < 0.2)
        invalid_idx = np.where(invalid_idx == True)
        gt_boxes = np.delete(labelRect, invalid_idx[0], axis=0)
        labels = np.delete(labels, invalid_idx[0], axis=0)

        return cropped_im, gt_boxes, labels
Beispiel #5
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap roi_fg_threshold
    fg_inds = np.where(max_overlaps >= 0.5)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) roi_bg_threshold_high roi_bg_threshold_low
    bg_inds = np.where((max_overlaps < 0.5) &
                       (max_overlaps >= 0.1))[0]

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = 0
    else:
        bg_inds = np.where((max_overlaps < 0.5))[0]
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = 0
        #raise Exception()

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]

    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
Beispiel #6
0
def relationship_checker(gt_objects,
                         gt_relationships,
                         bicls,
                         boxes_s,
                         boxes_o,
                         thres=0.99):
    '''
    :param gt_objects: (gt_num, 5) [x1,y1,x2,y2,cls]
    :param gt_relationships: (gt_num, gt_num)
    :param bicls: prediction of 'have relationship or not' (gt*(gt-1), 1)
    :param boxes_s: (gt*(gt-1), 5) [0,x1,y1,x2,y2]
    :param boxes_o: (gt*(gt-1), 5)
    :return:
    '''
    gt_rel_sub_idx, gt_rel_obj_idx = np.where(
        gt_relationships > 0)  # ground truth number
    gt_sub = gt_objects[gt_rel_sub_idx, :5]
    gt_obj = gt_objects[gt_rel_obj_idx, :5]
    gt_rel = gt_relationships[gt_rel_sub_idx, gt_rel_obj_idx]

    recall_total = len(gt_rel)
    precision_total = np.sum(bicls >= 0.5)
    recall_correct = 0

    sub_overlaps = bbox_overlaps(
        np.ascontiguousarray(boxes_s[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_sub[:, :4], dtype=np.float))
    obj_overlaps = bbox_overlaps(
        np.ascontiguousarray(boxes_o[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_obj[:, :4], dtype=np.float))

    for gt_id in xrange(recall_total):
        fg_candidate = np.where(
            np.logical_and(sub_overlaps[:, gt_id] == 1,
                           obj_overlaps[:, gt_id] == 1))[0]

        for candidate_id in fg_candidate:
            if bicls[candidate_id] >= 0.5:
                recall_correct += 1
                break

    precision_correct = recall_correct

    return precision_correct, precision_total, recall_correct, recall_total
Beispiel #7
0
def check_recall(rois, gt_objects, top_N, thres=0.5):
    overlaps = bbox_overlaps(
        np.ascontiguousarray(rois.cpu().data.numpy()[:top_N, 1:5],
                             dtype=np.float),
        np.ascontiguousarray(gt_objects[:4], dtype=np.float))

    overlap_gt = np.amax(overlaps, axis=0)
    correct_cnt = np.sum(overlap_gt >= thres)
    total_cnt = overlap_gt.size
    return correct_cnt, total_cnt
Beispiel #8
0
    def _merge_dets(self, detections, tile_ids):
        detections = np.asarray(detections, dtype=DET_DTYPE).view(np.recarray)
        tile_ids = np.asarray(tile_ids)
        if len(detections) == 0:
            return detections

        # merge detections across different tiles
        bboxes = detections.tlbr
        ious = bbox_overlaps(bboxes, bboxes)
        detections = self._merge(detections, tile_ids, ious, self.merge_thresh)
        return detections.view(np.recarray)
Beispiel #9
0
    def _iou_cost(self, trk_ids, detections):
        if len(trk_ids) == 0 or len(detections) == 0:
            return np.empty((len(trk_ids), len(detections)))

        # make sure associated pair has the same class label
        trk_labels = np.array([self.tracks[trk_id].label for trk_id in trk_ids])
        trk_bboxes = np.array([self.tracks[trk_id].tlbr for trk_id in trk_ids])
        det_bboxes = detections.tlbr
        ious = bbox_overlaps(trk_bboxes, det_bboxes)
        ious = self._gate_cost(ious, trk_labels, detections.label, self.iou_thresh, True)
        return ious
Beispiel #10
0
def checker(rois, gt_objects, thres=0.7):
    overlaps = bbox_overlaps(
        np.ascontiguousarray(rois.cpu().data.numpy()[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_objects[:, :4], dtype=np.float))

    max_overlaps = np.amax(overlaps, axis=1)
    precision_correct = np.sum(max_overlaps >= thres)
    precision_total = max_overlaps.size

    overlaps_gt = np.amax(overlaps, axis=0)
    recall_correct = np.sum(overlaps_gt >= thres)
    recall_total = overlaps_gt.size
    return precision_correct, precision_total, recall_correct, recall_total
Beispiel #11
0
def cython_bbox_ious(atlbrs, btlbrs):
    ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
    if ious.size == 0:
        return ious
    try:
        import cython_bbox
    except Exception as e:
        logger.error('cython_bbox not found, please install cython_bbox.'
                     'for example: `pip install cython_bbox`.')
        raise e

    ious = cython_bbox.bbox_overlaps(
        np.ascontiguousarray(atlbrs, dtype=np.float),
        np.ascontiguousarray(btlbrs, dtype=np.float))
    return ious
Beispiel #12
0
def compute_pairwise_iou(a, b):
    """Computes the pairwise intersection over union for the arrays of boxes a and b.

  Args:
    a: np.ndarray; Array of N boxes in format x1y1x2y2.
    b: np.ndarray; Array of M boxes in format x1y1x2y2.
  
  Returns:
    np.ndarray; A NxM array where the entry at (i, j) is the intersection over
      union of box i from a, and box j from b.
  """

    C = 1 - bbox_overlaps(
        np.ascontiguousarray(a, dtype=np.float64),
        np.ascontiguousarray(b, dtype=np.float64),
    )
    return C
Beispiel #13
0
def iou_distance(A, B):
    '''计算轨迹之间的IOU距离
    
    Args:
        A (list of Trajectory): 轨迹组A
        B (list of Trajectory): 轨迹组B
    Returns:
        costs (numpy.ndarray): 代价矩阵
    '''
    BA = [a.ltrb for a in A]
    BB = [b.ltrb for b in B]
    ious = np.zeros((len(A), len(B)), dtype=np.float)
    if ious.size == 0:
        return ious
    ious = bbox_overlaps(np.ascontiguousarray(BA, dtype=np.float),
                         np.ascontiguousarray(BB, dtype=np.float))
    costs = 1 - ious
    return costs
Beispiel #14
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image,
                 rois_per_image, num_classes):
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]
    fg_inds = np.where(max_overlaps >= 0.5)[0]
    bg_inds = np.where((max_overlaps < 0.5) & (max_overlaps >= 0.1))[0]
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds,
                             size=int(fg_rois_per_image),
                             replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(bg_rois_per_image),
                             replace=to_replace)
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = 0
    else:
        raise Exception()
    keep_inds = np.append(fg_inds, bg_inds)
    labels = labels[keep_inds]
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]
    bbox_target_data = _compute_targets_label(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
    bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(
        bbox_target_data, num_classes)
    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
Beispiel #15
0
    def _remove_duplicate(self, updated, aged):
        if len(updated) == 0 or len(aged) == 0:
            return

        updated_bboxes = np.array([self.tracks[trk_id].tlbr for trk_id in updated])
        aged_bboxes = np.array([self.tracks[trk_id].tlbr for trk_id in aged])

        ious = bbox_overlaps(updated_bboxes, aged_bboxes)
        idx = np.where(ious >= self.duplicate_iou)
        dup_ids = set()
        for row, col in zip(*idx):
            updated_id, aged_id = updated[row], aged[col]
            if self.tracks[updated_id].start_frame <= self.tracks[aged_id].start_frame:
                dup_ids.add(aged_id)
            else:
                dup_ids.add(updated_id)
        for trk_id in dup_ids:
            LOGGER.debug('Duplicate: %s', self.tracks[trk_id])
            del self.tracks[trk_id]
Beispiel #16
0
def eval(boxes, label, scores, gt_bboxes, gt_label):
  frame_det = np.empty((0, 2))
  video_det = np.empty((0, 2))
  for i in xrange(len(boxes)):
    if not(label[i] == gt_label):
      s = np.array([scores[i], 0])
      video_det = np.vstack((video_det, s))
      s = np.expand_dims(s, axis=0)
      #frame_det = np.vstack((frame_det, np.repeat(s, boxes[i].shape[0], axis=0)))

    iou = 0
    for j in xrange(boxes[i].shape[0]):
      frame_idx = boxes[i][j, 0]
      curr_box = np.expand_dims(boxes[i][j, 1 : 5], axis=0)
      curr_gt_idx = np.where(gt_bboxes[:,:,0] == frame_idx)
      curr_gt = gt_bboxes[curr_gt_idx]
      curr_gt = curr_gt[:, 1 : 5]
      overlaps = bbox_overlaps(
          np.ascontiguousarray(curr_box, dtype=np.float),
          np.ascontiguousarray(curr_gt, dtype=np.float)).max()
      frame_det = np.vstack((frame_det, np.array([scores[i], overlaps])))
      iou += overlaps

    for j in xrange(int(gt_bboxes.shape[1] - boxes[i][-1, 0] - 1)):
      frame_det = np.vstack((frame_det, np.array([scores[i], 0.93])))
      iou += 0.83

    for j in xrange(int(boxes[i][0,0])):
      frame_det = np.vstack((frame_det, np.array([0, 1])))
      pass

    video_det = np.vstack((video_det, np.array([scores[i], iou / (gt_bboxes.shape[1] - boxes[i][0, 0])])))


  gt_nums = gt_bboxes.size / 5
  gt_vid = gt_bboxes.shape[0]
  return frame_det, video_det, gt_nums, gt_vid
Beispiel #17
0
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
      .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Beispiel #18
0
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors
    im_info = im_info[0]
    _allowed_border = 0
    height, width = rpn_cls_score.shape[1:3]
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border)
        & (all_anchors[:, 3] < im_info[0] + _allowed_border))[0]
    anchors = all_anchors[inds_inside, :]
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
    labels[max_overlaps < 0.3] = 0
    labels[gt_argmax_overlaps] = 1
    labels[max_overlaps > 0.7] = 1
    num_fg = int(256 * 0.5)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=len(fg_inds) - num_fg,
                                  replace=False)
        labels[disable_inds] = -1
    num_bg = 256 - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=len(bg_inds) - num_bg,
                                  replace=False)
        labels[disable_inds] = -1
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    num_examples = np.sum(labels >= 0)
    positive_weights = np.ones((1, 4)) * 1.0 / num_examples
    negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)
    labels = labels.reshape((1, height, width, A)).transpose((0, 3, 1, 2))
    labels = labels.reshape((1, 1, height * A, width))
    rpn_labels = labels
    bbox_targets = bbox_targets.reshape((1, height, width, A * 4))
    rpn_bbox_targets = bbox_targets
    bbox_inside_weights = bbox_inside_weights.reshape(
        (1, height, width, A * 4))
    rpn_bbox_inside_weights = bbox_inside_weights
    bbox_outside_weights = bbox_outside_weights.reshape(
        (1, height, width, A * 4))
    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Beispiel #19
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image,
                 rois_per_image, num_classes):
    """

    # bbox_targets:256*(4*21)的矩阵,只有为正样本时,对应类别的坐标才不为0,其他类别的坐标全为0
  # bbox_inside_weights:256*(4*21)的矩阵,正样本时,对应类别四个坐标的权重为1,其他全为0
  # labels  128 个  前面的fg_rois_per_image是正样本 小于等于32   是非极大值抑制之后 筛选最优的  128个
  # rois  取128个 前面的fg_rois_per_image是正样本 小于等于32      是非极大值抑制之后 筛选最优的  128个
  #roi_scores  取128个 前面的fg_rois_per_image是正样本 小于等于32  是非极大值抑制之后 筛选最优的  128个
  return labels, rois, roi_scores, bbox_targets, bbox_inside_weights

  examples.
   # all_rois    all_scores 非极大值抑制得到的框 与 交并比
  # #rois roi_scores 非极大值抑制后得到的 输入的是 特征图所有的值计算非极大值抑制
  # gt_boxes   0.25*128   128   21
  """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

    #overlaps交并比    特征图中非极大值抑制之后  得到的框 与真实边框的 交并比值
    #[N.V]
    gt_assignment = overlaps.argmax(axis=1)  #列  最大的下标[N]
    max_overlaps = overlaps.max(axis=1)  #列  最大的值 [N]
    labels = gt_boxes[gt_assignment, 4]  #label不是one_hot

    #cfg.TRAIN.FG_THRESH = 0.5
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # cfg.TRAIN.BG_THRESH_HI = 0.5
    # cfg.TRAIN.BG_THRESH_LO = 0.1
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                       & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        #fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
        # 0.25*128 fg_rois_per_image
        #比较 0.25*128  与
        #overlaps交并比    特征图中非极大值抑制之后  得到的框 与真实边框的 交并比值
        #列的最大值且大于 0.5的个数  与  0.25*128 取较小值

        #就是将 筛选overlaps交并比  得到的 框控制在 0.25*128  范围之内

        #fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
        #在列的最大值且大于 0.5的个数  中随机抽取 fg_rois_per_image  小于等于32个
        fg_inds = npr.choice(fg_inds,
                             size=int(fg_rois_per_image),
                             replace=False)
        #fg_inds  iou大于iou 0.5  小于等于32个 的下标
        #fg_rois_per_image  小于32  就是本来的数据

        #选取正正样本的下标

        #   rois_per_image =  128          -          fg_rois_per_image 小于等于32个
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        #总共要得到  128个 正样本 和负样本  fg_rois_per_image正样本个数
        #bg_rois_per_image负样本个数

        #0.1=< bg_inds <0.5
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(bg_rois_per_image),
                             replace=to_replace)
        #replace表示随机所选的元素中 ,是否可以重复 当为True 可以重复

        #bg_inds选取负样本的下标

    elif fg_inds.size > 0:  #fg_inds  是大于0.5   ##0.1=< bg_inds <0.5为0
        to_replace = fg_inds.size < rois_per_image  # =128
        fg_inds = npr.choice(fg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:  #0.1=< bg_inds <0.5 #是大于  0.5为0
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = 0
    else:
        #??????????????????????????????????????
        import pdb
        pdb.set_trace()

    #fg_inds正样本的下标 bg_inds负样本的下标  加起来  =128
    keep_inds = np.append(fg_inds, bg_inds)  #一维的数据拼接在一起
    labels = labels[keep_inds]  # 取128个

    #fg_rois_per_image 正样本的个数   bg_rois_per_image负样本的个数
    labels[int(fg_rois_per_image):] = 0  #正样本和后面的标签  设置为 0
    # labels 128 个
    rois = all_rois[keep_inds]  #取128个 前面的fg_rois_per_image是正样本 小于等于32

    roi_scores = all_scores[keep_inds]  #取128个 前面的fg_rois_per_image是正样本 小于等于32

    # gt_assignment = overlaps.argmax(axis=1)#列  最大的下标[N]
    bbox_target_data = _compute_targets(rois[:, 1:5],
                                        gt_boxes[gt_assignment[keep_inds], :4],
                                        labels)
    #rois = all_rois[keep_inds]#取128个 前面的fg_rois_per_image是正样本 小于等于32
    #gt_boxes[gt_assignment[keep_inds], :4]  labels 是经过筛选IOU得到的128个值  前面的fg_rois_per_image是正样本

    # 返回   组合  [标签 , dx ,dy, dw ,dh]
    #     bbox_target_data     =return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)

    #组合[标签 , dx, dy, dw, dh]    21  # label不是one_hot
    bbox_targets, bbox_inside_weights = \
      _get_bbox_regression_labels(bbox_target_data, num_classes)
    # bbox_targets:256*(4*21)的矩阵,只有为正样本时,对应类别的坐标才不为0,其他类别的坐标全为0
    # bbox_inside_weights:256*(4*21)的矩阵,正样本时,对应类别四个坐标的权重为1,其他全为0

    # bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]  # 对应的坐标偏移  赋值   给对应的类别
    # # [标签 , dx ,dy, dw ,dh]  的 dx ,dy, dw ,dh  转换到   256*(4*21)的矩阵
    # bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
    # [1.0, 1.0, 1.0, 1.0]  # 对应的权重(1.0, 1.0, 1.0, 1.0)  赋值给对应的类别
    # return bbox_targets, bbox_inside_weights

    # bbox_targets:256*(4*21)的矩阵,只有为正样本时,对应类别的坐标才不为0,其他类别的坐标全为0
    # bbox_inside_weights:256*(4*21)的矩阵,正样本时,对应类别四个坐标的权重为1,其他全为0
    # labels  128 个  前面的fg_rois_per_image是正样本 小于等于32   是非极大值抑制之后 筛选最优的  128个
    # rois  取128个 前面的fg_rois_per_image是正样本 小于等于32      是非极大值抑制之后 筛选最优的  128个
    #roi_scores  取128个 前面的fg_rois_per_image是正样本 小于等于32  是非极大值抑制之后 筛选最优的  128个
    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
Beispiel #20
0
    def evaluate_recall(self,
                        candidate_boxes=None,
                        thresholds=None,
                        area='all',
                        limit=None):
        """Evaluate detection proposal recall metrics.

    Returns:
        results: dictionary of results with keys
            'ar': average recall
            'recalls': vector recalls at each IoU overlap threshold
            'thresholds': vector of IoU overlap thresholds
            'gt_overlaps': vector of all ground-truth overlaps
    """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = {
            'all': 0,
            'small': 1,
            'medium': 2,
            'large': 3,
            '96-128': 4,
            '128-256': 5,
            '256-512': 6,
            '512-inf': 7
        }
        area_ranges = [
            [0**2, 1e5**2],  # all
            [0**2, 32**2],  # small
            [32**2, 96**2],  # medium
            [96**2, 1e5**2],  # large
            [96**2, 128**2],  # 96-128
            [128**2, 256**2],  # 128-256
            [256**2, 512**2],  # 256-512
            [512**2, 1e5**2],  # 512-inf
        ]
        assert area in areas, 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in range(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            #      print('self.roidb[i]',i,self.roidb[i])
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(
                axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0)
                               & (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0])
                                     & (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in range(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert (gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert (_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {
            'ar': ar,
            'recalls': recalls,
            'thresholds': thresholds,
            'gt_overlaps': gt_overlaps
        }
Beispiel #21
0
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    # rpn_clobber_positives
    if not False:
        # rpn_negative_overlap
        labels[max_overlaps < 0.3] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # rpn_positive_overlap
    labels[max_overlaps >= 0.7] = 1

    # rpn_clobber_positives
    if False:
        # rpn_negative_overlap
        labels[max_overlaps < 0.3] = 0

    # rpn_fg_fraction * rpn_batchsize
    num_fg = int(0.5 * 256)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        labels[disable_inds] = -1

    # rpn_batchsize
    num_bg = 256 - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1

    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # bbox_inside_weights (1.0, 1.0, 1.0, 1.0)
    bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)

    # rpn_positive_weight
    if -1 < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((-1 > 0) &
                (-1 < 1))
        positive_weights = (-1 /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - -1) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Beispiel #22
0
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
  """
   #返回的是     特征图映射到原图的  所有的边框
  #把超出图像尺寸的 边框 置为-1
  #在输入的所有的边框与标签中  帅选出  小于等于256/2 个正负样本  总共 就是256 样本  正为1负为0 其他为-1

  #超出图像尺寸的边框的  label等置为-1  边框偏移量 0 边框权重0  边框权重的归一化参数0
  #    标签正样本1,负0,不关注-1  (1, 1, A * height, width)
  #    边框 偏移量  是偏移量 dx dy dw dh  是中心坐标与  边框长度的偏移量(1, height, width, A * 4)
  #    边框权重1             (1, height, width, A * 4)
  #    边框权重的归一化参数  (1, height, width, A * 4)
  return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights


      rpn_cls_score  rpn 一条路径得到的  背景前景值 [:, :, :, 18]
      self._gt_boxes = tf.placeholder(tf.float32, shape=[None, 5])
      self._feat_stride = 16
      self._im_info = tf.placeholder(tf.float32, shape=[3])
      self._anchors   wgg特征图 对应原始坐标的所有 边框  [-1,4]
      self._num_anchors = 9
  """
  A = num_anchors  #=9
  total_anchors = all_anchors.shape[0]  #total_anchors得到 锚点的个数 N*9个
  K = total_anchors / num_anchors       #   得到N  就是得到VGG特征图有几个点

  # allow boxes to sit over the edge by a small amount
  _allowed_border = 0

  # map of shape (..., H, W)
  height, width = rpn_cls_score.shape[1:3]#rpn_cls_score [:, :, :, 18]

  # only keep anchors inside the image
  # np.where返回的是满足条件的  标索引 和类型  [0]意思是只返回索引
  inds_inside = np.where(# 所有archors边界可能超出图像,取在图像内部的archors的索引
    (all_anchors[:, 0] >= -_allowed_border) &#_allowed_border=0
    (all_anchors[:, 1] >= -_allowed_border) &
    (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
    (all_anchors[:, 3] < im_info[0] + _allowed_border))[0]  # height
  """
    i= np.array([1,1,1,2,3,4,5])
    inds_inside = np.where((i>= 2))
    print(inds_inside)      (array([3, 4, 5, 6], dtype=int64),)
  """
  # keep only inside anchors
  # 得到在图像内部archors的坐标
  anchors = all_anchors[inds_inside, :]

  # label: 1 is positive, 0 is negative, -1 is dont care
  labels = np.empty((len(inds_inside),), dtype=np.float32)
  # np.empty()
  # 返回一个随机元素的矩阵,大小按照参数定义
  labels.fill(-1)#把里面的值都变为-1

  # label: 1  正样本, 0  负样本, -1  不关注


#?????????????????????????????????????????????????????????????????????????????????
  # overlaps between the anchors and the gt boxes
  # overlaps (ex, gt)
  # 计算每个anchors:n*4和每个真实位置   gt_boxes:m*4的重叠区域的比的矩阵:n*m
  overlaps = bbox_overlaps(
    np.ascontiguousarray(anchors, dtype=np.float),
    np.ascontiguousarray(gt_boxes, dtype=np.float))
  # ?????????????????????????????????????????????????????????????????????????????????
  #这里不可以自己写??????????????????

  # overlaps  n*m   是重叠区域   交并比   猜的  猜的  猜的



  #overlaps   n*m  argmax_overlaps  是下标
  argmax_overlaps = overlaps.argmax(axis=1)
  # 找到每行最大值的位置,即每个archors对应的正样本的位置,得到  [n]  1维  的行向量

  #首先 overlaps 得到预测的 预测与真实边框的置信度 是经过  inds_inside帅选所以  inds_inside的个数等于overlaps个数

  #得到每个预测的边框的 anchors  与gt_boxes   比最大的值max_overlaps
  max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]

  gt_argmax_overlaps = overlaps.argmax(axis=0)#索引
  #上面是 求得行的最大值  是一个预测与 所有真实  的最大
  # 这里求得列的最大值    是所有的预测 与  所有的真实边框的  一个一个一个  框的比值  的最大值索引

  #gt_argmax_overlaps [1,V]  遍历V     overlaps [n,m]                 m
  gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]

  #gt_argmax_overlaps 是预测的所有边框
  gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
  #

  #__C.TRAIN.RPN_CLOBBER_POSITIVES = False
  if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
    # assign bg labels first so that positive labels can clobber them
    # first set the negatives
    #label 是和 置信度 具有一样长度 值全为-1
    # labels = np.empty((len(inds_inside),), dtype=np.float32)
    # label: 1 正样本, 0 负样本, -1 不关注
    # labels.fill(-1)

    #__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
    # max_overlaps是列最大值的地方
    labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    # 将archors对应的正样本的重叠区域中小于阈值的置0

  # fg label: for each gt, anchor with highest overlap
  labels[gt_argmax_overlaps] = 1
  #每个真实位置对应的archors置1

  # fg label: above threshold IOU  __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
  labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
  #得到的是真实边框对应最适合的一个预测边框

  # __C.TRAIN.RPN_CLOBBER_POSITIVES = False
  if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
    # assign bg labels last so that negative labels can clobber positives
    #cfg.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
    labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    # 将archors对应的正样本的重叠区域中小于阈值的置0



# 限定得到的框在256/2  之内   小于256/2之内则不变
#限定得到的框在256/2  之内    小于256/2之内则不变

  # subsample positive labels if we have too many
  #__C.TRAIN.RPN_FG_FRACTION = 0.5
  #__C.TRAIN.RPN_BATCHSIZE = 256
  num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
  fg_inds = np.where(labels == 1)[0]
  if len(fg_inds) > num_fg:
    disable_inds = npr.choice(
      fg_inds, size=(len(fg_inds) - num_fg), replace=False)
    labels[disable_inds] = -1

  # subsample negative labels if we have too many
  #               256
  num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
  #得到等于1的个数 如果是大于256/2则返回256/2

  bg_inds = np.where(labels == 0)[0]
  if len(bg_inds) > num_bg:
    disable_inds = npr.choice(
      bg_inds, size=(len(bg_inds) - num_bg), replace=False)
    labels[disable_inds] = -1

# 限定得到的框在256/2  之内  小于256/2之内则不变



  # 所有archors边界可能超出图像,取在图像内部的archors的索引
  bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)

  #得到在图像内部archors的坐标 anchors = all_anchors[inds_inside, :]
  bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
  #bbox_targets 得到移动的  dx dy dw dh

  #通过archors和archors对应的正样本计算坐标的偏移



  bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
  # only the positive ones have regression targets
  #cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS  [1.0, 1.0, 1.0, 1.0]
  bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
  #     正样本   的四个坐标的权重均设置为1   只是正样本 其他都是0


  bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)



  if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:  #cfg.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
    # uniform weighting of examples (given non-uniform sampling)
    num_examples = np.sum(labels >= 0)#正负样本的个数
    positive_weights = np.ones((1, 4)) * 1.0 / num_examples# 归一化的权重
    negative_weights = np.ones((1, 4)) * 1.0 / num_examples# 归一化的权重
  else:
    #cfg.TRAIN.RPN_POSITIVE_WEIGHT  默认是-1  如果是 在   0到1的值

    assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
            (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
    positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                        np.sum(labels == 1))
    negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                        np.sum(labels == 0))

  #这里不在  if里面了
  bbox_outside_weights[labels == 1, :] = positive_weights# 归一化的权重
  bbox_outside_weights[labels == 0, :] = negative_weights# 归一化的权重


  # map up to original set of anchors
  # total_anchors = all_anchors.shape[0]  #total_anchors得到 锚点的个数 N*9个
  # inds_inside  所有archors边界可能超出图像,取在图像内部的archors的索引
  #labels = np.empty((len(inds_inside),), dtype=np.float32)
  labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
  # 函数的作用是  在特征图映射到原图的所有框中   把超出边界的 边框  的label置为 - 1

  bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
  #得到在图像内部archors的坐标 anchors = all_anchors[inds_inside, :]
  # bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
  #bbox_targets 得到移动的  dx dy dw dh

  #把超出边框的 dx dy dw dh 置为0

  bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
  # 所有archors中正样本的四个坐标的权重均设置为1,其他为0

  #归一化参数   把超出  边界的边框   的 归一化参数  置为0
  bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)


  # labels        A = num_anchors  #=9
  #height, width = rpn_cls_score.shape[1:3]  # rpn_cls_score [:, :, :, 18]
  # labels   这里 的label 是已经把  超出图像尺寸 与没超出的  尺寸  组合在一起
  labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
  labels = labels.reshape((1, 1, A * height, width))
  rpn_labels = labels

  #                           bbox_targets
  # bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
  # 得到在图像内部archors的坐标 anchors = all_anchors[inds_inside, :]
  # bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
  # bbox_targets 得到移动的  dx dy dw dh
  bbox_targets = bbox_targets \
    .reshape((1, height, width, A * 4))

  rpn_bbox_targets = bbox_targets
  # bbox_inside_weights
  bbox_inside_weights = bbox_inside_weights \
    .reshape((1, height, width, A * 4))

  rpn_bbox_inside_weights = bbox_inside_weights
  # bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
  # 所有archors中正样本的四个坐标的权重均设置为1,其他为0

  # bbox_outside_weights
  bbox_outside_weights = bbox_outside_weights \
    .reshape((1, height, width, A * 4))

  rpn_bbox_outside_weights = bbox_outside_weights

  #返回的是     特征图映射到原图的  所有的边框
  #把超出图像尺寸的 边框 置为-1
  #在输入的所有的边框与标签中  帅选出  小于等于256/2 个正负样本  总共 就是256 样本  正为1负为0 其他为-1

  #超出图像尺寸的边框的  label等置为-1  边框偏移量 0 边框权重0  边框权重的归一化参数0
  #    标签正样本1,负0,不关注-1  (1, 1, A * height, width)
  #    边框 偏移量  是偏移量 dx dy dw dh  是中心坐标与  边框长度的偏移量(1, height, width, A * 4)
  #    边框权重1             (1, height, width, A * 4)
  #    边框权重的归一化参数  (1, height, width, A * 4)
  return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Beispiel #23
0
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    # rpn_cls_score: rpn分类得分
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors
    # 统计平均每个anchor有几个框被选取
    im_info = im_info[0]

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    # 只保留图像范围内的box,过滤掉不在图像范围内的box
    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # 打标签,首先全贴上-1,即don't care
    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # 计算anchor和ground trueth的重叠率
    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    # 读取每一行重叠率的最大值
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    # 返回与gt重合率最大的索引
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    # 若参数为False:将满足负样本阈值的anchor全部标记为0
    # 若参数为True: 将满足负样本阈值且不满足正样本阈值的anchor标签设为0
    if not cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # fg label: for each gt, anchor with highest overlap
    # 前景1:对每一个gt框,重叠率最大得检测框标签设为1,即前景
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    # 前景标签2:满足重叠率阈值的检测结果标签打为1
    labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1

    if cfg.FLAGS.rpn_clobber_positives:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0

    # 如果正样本过多则重采样,使正负样本均衡
    # subsample positive labels if we have too many
    num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # 对负样本进行同样操作
    # subsample negative labels if we have too many
    num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    # 计算检测RoI与真实RoI的偏移
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.FLAGS2["bbox_inside_weights"])

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.FLAGS.rpn_positive_weight < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.FLAGS.rpn_positive_weight > 0) &
                (cfg.FLAGS.rpn_positive_weight < 1))
        positive_weights = (cfg.FLAGS.rpn_positive_weight /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    # 改变label形状
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # 计算bbox大小参数,给出每一个anchor框的输入权重、输出权重
    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Beispiel #24
0
def track(dets):
  beta = 0.5
  n = len(dets)
  num_dets = dets[0]['boxes'].shape[0]
  depth = dets[0]['boxes'].shape[1]
  tmp = np.empty((0))
  for i in xrange(n):
    tmp = np.hstack((tmp, dets[i]['pred_label']))
  u_label = np.unique(tmp)

  rrrr = []
  llll = []
  ssss = []

  for l in u_label:
    valid_dets = []
    valid_score = []

    # Filter out negative samples.
    for i in xrange(n):
      inds = np.where(np.logical_and(dets[i]['pred_label'] == l,
                                     dets[i]['pred_scores'][:, 0] > 0.1))[0]
      valid_dets.append(dets[i]['boxes'][inds])
      valid_score.append(dets[i]['pred_scores'][inds, 0])

    det_traces = []
    det_scores = np.zeros((0,1))
    # Viterbi
    if valid_score[0].size > 0:
      old_scores = np.expand_dims(valid_score[0], axis=1)
      old_trace = []
      for i in xrange(old_scores.size):
        old_trace.append((i,))

    for i in xrange(1, n):
      if valid_dets[i - 1].size == 0 and valid_dets[i].size > 0:
        old_scores = np.expand_dims(valid_score[i], axis=1)
        old_trace = []
        for j in xrange(old_scores.size):
          old_trace.append((j + i * 100,))
      elif valid_dets[i-1].size > 0 and valid_dets[i].size == 0:
        det_traces = det_traces + old_trace
        det_scores = np.vstack((det_scores, old_scores))
        old_trace = []
        old_scores = np.zeros((0))
      elif valid_dets[i-1].size > 0 and valid_dets[i].size > 0:
        overlaps = bbox_overlaps(
            np.ascontiguousarray(valid_dets[i - 1][:, depth - 1], dtype=np.float),
            np.ascontiguousarray(valid_dets[i][:, depth - 1], dtype=np.float))
        scores = beta * overlaps + old_scores
        argmax_scores = scores.argmax(axis=0)
        old_scores = np.expand_dims(scores.max(axis=0) + valid_score[i], axis=1)
        trace = []
        for j in xrange(old_scores.size):
          trace.append(old_trace[argmax_scores[j]] + (j + i * 100,))
        old_trace = trace
    if len(old_trace) > 0:
      det_traces = det_traces + old_trace
      det_scores = np.vstack((det_scores, old_scores))

    boxes = []
    for i in xrange(len(det_traces)):
      curr_boxes = np.empty((len(det_traces[i]) * 8, 5))
      for j in xrange(len(det_traces[i])):
        idx = det_traces[i][j] % 100
        ff = det_traces[i][j] / 100
        curr_boxes[j * depth : (j + 1) * depth, 1 : 5] = dets[j]['boxes'][idx]
        curr_boxes[j * depth : (j + 1) * depth, 0] = np.arange(depth) + ff * depth
      boxes.append(curr_boxes)

    ssss = np.empty((0, 1))
    while det_scores.size > 0:
      [r, s, boxes, det_scores, det_traces] = nms(boxes, det_scores, det_traces)
      rrrr.append(r)
      llll.append(l)
      ssss = np.vstack((ssss, s))
  return rrrr, llll, ssss
Beispiel #25
0
def anchor_target_layer(
        gt_boxes, img_shape, all_anchors, is_restrict_bg=False):
    '''
    Introduction: 为提取出的anchor打上标签, 即正,负, 不关心的样本。 正负样本总数为256, 若正样本少于128,则补充采样的负样本
    并计算出与groundtruth box的偏差量。
    :param gt_boxes: [-1, 5]: [xmin, ymin, xmax, ymax, label] "gt_boxes is groundtruth box"
    :param img_shape: [1, h, w, 3]
    :param all_anchors: [-1, 4]: [xmin, ymin, xmax, ymax]
    :param is_restrict_bg:
    :return: rpn_labels [-1, 1], rpn_bbox_targets [-1, 4]
    '''
    """Same as the anchor target layer in original Fast/er RCNN """

    total_anchors = all_anchors.shape[0]
    img_h, img_w = img_shape[1], img_shape[2]
    gt_boxes = gt_boxes[:, :-1]  # remove class label

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0 # 允许框贴紧图像边缘的程度

    # only keep anchors inside the image
    # 过滤掉不在图像范围内的boxes,首先用where函数加条件筛选出索引
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < img_w + _allowed_border) &  # width
        (all_anchors[:, 3] < img_h + _allowed_border)  # height
    )[0]
    # 用索引切片出满足条件的anchor
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1) # 首先为anchor全打上dont care的标记

    # overlaps between the anchors and the gt boxes
    # bbox_overlaps函数计算的是两个框之间的IOU, 这里是计算 每个anchor 与 每个gtbox的IOU。
    # overlaps: num_anchor行, num_gt列. 每一行为: 一个anchor与所有gtbox的IOU
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))

    # argmax_overlaps: 计算每个anchor与哪个gtbox的IOU最大, 返回gtbox的索引
    argmax_overlaps = overlaps.argmax(axis=1)

    # max_overlaps: 将所有与anchor有最大IOU的gtbox取出
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]

    # gt_argmax_overlaps: 计算每个gtbox与哪个anchor的IOU最大, 返回anchor的索引
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    # gt_max_overlaps: 将所有与gtbox有最大IOU的anchor取出
    gt_max_overlaps = overlaps[
        gt_argmax_overlaps, np.arange(overlaps.shape[1])]
    # gt_argmax_overlaps: 将IOU最大的那些anchor都捞出来
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
        labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0

    labels[gt_argmax_overlaps] = 1
    labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1

    if cfgs.TRAIN_RPN_CLOOBER_POSITIVES:
        labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0

    num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        labels[disable_inds] = -1

    num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1)
    if is_restrict_bg:
        num_bg = max(num_bg, num_fg * 1.5)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1

    # 转换为偏差量
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)

    # labels = labels.reshape((1, height, width, A))
    rpn_labels = labels.reshape((-1, 1))

    # bbox_targets
    bbox_targets = bbox_targets.reshape((-1, 4))
    rpn_bbox_targets = bbox_targets

    return rpn_labels, rpn_bbox_targets
Beispiel #26
0
def check_relationship_recall(gt_objects,
                              gt_relationships,
                              cls_r,
                              inds_s,
                              inds_o,
                              boxes_s,
                              boxes_o,
                              top_Ns,
                              thres=0.5,
                              use_gt_boxes=True,
                              union_overlap=True):
    def box_union(box1, box2):
        return np.concatenate((np.minimum(
            box1[:, :2], box2[:, :2]), np.maximum(box1[:, 2:], box2[:, 2:])),
                              1)

    boxes_union = box_union(boxes_s, boxes_o)
    # rearrange the ground truth
    gt_rel_sub_idx, gt_rel_obj_idx = np.where(
        gt_relationships > 0)  # ground truth number
    gt_sub = gt_objects[gt_rel_sub_idx, :5]
    gt_obj = gt_objects[gt_rel_obj_idx, :5]
    gt_rel = gt_relationships[gt_rel_sub_idx, gt_rel_obj_idx]

    gt_union = box_union(gt_sub, gt_obj)

    rel_cnt = len(gt_rel)
    rel_correct_cnt = np.zeros(len(top_Ns))

    sub_overlaps = bbox_overlaps(
        np.ascontiguousarray(boxes_s[:, :4], dtype=np.float),
        np.ascontiguousarray(gt_sub[:, :4], dtype=np.float))
    obj_overlaps = bbox_overlaps(
        np.ascontiguousarray(boxes_o[:, :4], dtype=np.float),
        np.ascontiguousarray(gt_obj[:, :4], dtype=np.float))
    union_overlaps = bbox_overlaps(
        np.ascontiguousarray(boxes_union[:, :4], dtype=np.float),
        np.ascontiguousarray(gt_union[:, :4], dtype=np.float))

    for idx, top_N in enumerate(top_Ns):

        if use_gt_boxes:
            for gt_id in xrange(rel_cnt):
                fg_candidate = np.where(
                    np.logical_and(sub_overlaps[:, gt_id] == 1,
                                   obj_overlaps[:, gt_id] == 1))[0]

                for candidate_id in fg_candidate:
                    for cls_id in range(cls_r[idx].shape[1]):
                        if cls_r[idx][candidate_id, cls_id] == gt_rel[gt_id]:
                            rel_correct_cnt[idx] += 1
                            break

        elif union_overlap:
            for gt_id in xrange(rel_cnt):
                flag = 0
                fg_candidate = np.where(union_overlaps[:, gt_id] >= thres)[0]

                for candidate_id in fg_candidate:
                    if flag == 1:
                        break

                    for cls_id in range(cls_r[idx].shape[1]):
                        if cls_r[idx][candidate_id, cls_id] == gt_rel[gt_id] and \
                             inds_s[candidate_id] == gt_sub[gt_id, 4] and \
                             inds_o[candidate_id] == gt_obj[gt_id, 4]:
                            rel_correct_cnt[idx] += 1
                            flag = 1
                            break

        else:
            for gt_id in xrange(rel_cnt):
                fg_candidate = np.where(
                    np.logical_and(sub_overlaps[:, gt_id] >= thres,
                                   obj_overlaps[:, gt_id] >= thres))[0]

                for candidate_id in fg_candidate:

                    for cls_id in range(cls_r[idx].shape[1]):
                        if cls_r[idx][candidate_id, cls_id] == gt_rel[gt_id] and \
                                inds_s[candidate_id] == gt_sub[gt_id, 4] and \
                                inds_o[candidate_id] == gt_obj[gt_id, 4]:
                            rel_correct_cnt[idx] += 1
                            break

    return rel_cnt, rel_correct_cnt
Beispiel #27
0
def imdb_rpn_compute_stats(net,
                           imdb,
                           anchor_scales=(8, 16, 32),
                           feature_stride=16):
    raw_anchors = generate_anchors(scales=np.array(anchor_scales))
    print raw_anchors.shape
    sums = 0
    squred_sums = 0
    counts = 0
    roidb = filter_roidb(imdb.roidb)
    # Compute a map of input image size and output feature map blob
    map_w = {}
    map_h = {}
    for i in xrange(50, cfg.TRAIN.MAX_SIZE + 10):
        blobs = {
            'data': np.zeros((1, 3, i, i)),
            'im_info': np.asarray([[i, i, 1.0]])
        }
        net.blobs['data'].reshape(*(blobs['data'].shape))
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
        blobs_out = net.forward(data=blobs['data'].astype(np.float32,
                                                          copy=False),
                                im_info=blobs['im_info'].astype(np.float32,
                                                                copy=False))
        height, width = net.blobs['rpn/output'].data.shape[-2:]
        map_w[i] = width
        map_h[i] = height

    for i in xrange(len(roidb)):
        if not i % 5000:
            print 'computing %d/%d' % (i, imdb.num_images)
        im = cv2.imread(roidb[i]['image'])
        im_data, im_info = _get_image_blob(im)
        gt_boxes = roidb[i]['boxes']
        gt_boxes = gt_boxes * im_info[0, 2]
        height = map_h[im_data.shape[2]]
        width = map_w[im_data.shape[3]]
        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * feature_stride
        shift_y = np.arange(0, height) * feature_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = raw_anchors.shape[0]
        K = shifts.shape[0]
        all_anchors = (raw_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))

        # only keep anchors inside the image
        inds_inside = np.where((all_anchors[:, 0] >= 0)
                               & (all_anchors[:, 1] >= 0)
                               & (all_anchors[:, 2] < im_info[0, 1]) &  # width
                               (all_anchors[:, 3] < im_info[0, 0])  # height
                               )[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))

        # There are 2 types of bbox targets
        # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0]
        # 2. anchors which best match certain gt
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
        fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps)))
        gt_rois = gt_boxes[argmax_overlaps, :]

        anchors = anchors[fg_inds, :]
        gt_rois = gt_rois[fg_inds, :]
        targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32,
                                                                 copy=False)
        sums += targets.sum(axis=0)
        squred_sums += (targets**2).sum(axis=0)
        counts += targets.shape[0]

    means = sums / counts
    stds = np.sqrt(squred_sums / counts - means**2)
    print means
    print stds
    return means, stds
Beispiel #28
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors
        # measure GT overlap

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        # map of shape (..., H, W)
        height, width = bottom[0].data.shape[-2:]
        # GT boxes (x1, y1, x2, y2, label)
        gt_boxes = bottom[1].data
        # im_info
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print ''
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])
            print 'height, width: ({}, {})'.format(height, width)
            print 'rpn: gt_boxes.shape', gt_boxes.shape
            print 'rpn: gt_boxes', gt_boxes

        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        all_anchors = (self._anchors.reshape((1, A, 4)) +
                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -self._allowed_border) &
            (all_anchors[:, 1] >= -self._allowed_border) &
            (all_anchors[:, 2] < im_info[1] + self._allowed_border) &  # width
            (all_anchors[:, 3] < im_info[0] + self._allowed_border)    # height
        )[0]

        if DEBUG:
            print 'total_anchors', total_anchors
            print 'inds_inside', len(inds_inside)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print 'anchors.shape', anchors.shape

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        gt_boxes = gt_boxes.reshape(gt_boxes.shape[0], gt_boxes.shape[1])
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # subsample positive labels if we have too many
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(
                fg_inds, size=(len(fg_inds) - num_fg), replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(
                bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            labels[disable_inds] = -1
            #print "was %s inds, disabling %s, now %s inds" % (
                #len(bg_inds), len(disable_inds), np.sum(labels == 0))

        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

        bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

        bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            # uniform weighting of examples (given non-uniform sampling)
            num_examples = np.sum(labels >= 0)
            positive_weights = np.ones((1, 4)) * 1.0 / num_examples
            negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
            positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                                np.sum(labels == 1))
            negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                                np.sum(labels == 0))
        bbox_outside_weights[labels == 1, :] = positive_weights
        bbox_outside_weights[labels == 0, :] = negative_weights

        if DEBUG:
            self._sums += bbox_targets[labels == 1, :].sum(axis=0)
            self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
            self._counts += np.sum(labels == 1)
            means = self._sums / self._counts
            stds = np.sqrt(self._squared_sums / self._counts - means ** 2)
            print 'means:'
            print means
            print 'stdevs:'
            print stds

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

        if DEBUG:
            print 'rpn: max max_overlap', np.max(max_overlaps)
            print 'rpn: num_positive', np.sum(labels == 1)
            print 'rpn: num_negative', np.sum(labels == 0)
            self._fg_sum += np.sum(labels == 1)
            self._bg_sum += np.sum(labels == 0)
            self._count += 1
            print 'rpn: num_positive avg', self._fg_sum / self._count
            print 'rpn: num_negative avg', self._bg_sum / self._count

        # labels
        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, 1, A * height, width))
        top[0].reshape(*labels.shape)
        top[0].data[...] = labels

        # bbox_targets
        bbox_targets = bbox_targets \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        top[1].reshape(*bbox_targets.shape)
        top[1].data[...] = bbox_targets

        # bbox_inside_weights
        bbox_inside_weights = bbox_inside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_inside_weights.shape[2] == height
        assert bbox_inside_weights.shape[3] == width
        top[2].reshape(*bbox_inside_weights.shape)
        top[2].data[...] = bbox_inside_weights

        # bbox_outside_weights
        bbox_outside_weights = bbox_outside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_outside_weights.shape[2] == height
        assert bbox_outside_weights.shape[3] == width
        top[3].reshape(*bbox_outside_weights.shape)
        top[3].data[...] = bbox_outside_weights
Beispiel #29
0
    def forward(self,bottom,top):
        #load image
        # (img_fn, tag_fn) = self.get_next_image()
        (img, bbs) = self.get_next_image()
        
        #print img_fn
        # img = misc.imread(img_fn)
        (img,pos,zoom_ratio) = random_zoomout(img)
        
        img_height = np.shape(img)[0]
        img_width = np.shape(img)[1] 
        img = misc.imresize(img,(self.resize_height, self.resize_width))
        minv = np.min(img)
        maxv = np.max(img)
        if minv == maxv:
            norm_img = np.zeros((self.resize_height, self.resize_width, 3), dtype=np.float32)
        else:
            norm_img = (np.float32(img) - minv) / (maxv - minv) - 0.5
        
        if len(norm_img.shape)==2:
            top[0].data[0,0,:,:]=norm_img
        else:
            top[0].data[0,:,:,:]=np.transpose(norm_img, (2,0,1))
        # 0 xmin 1 ymin 2 w 3 h 
                    
        bbs[:,0] = bbs[:,0]*zoom_ratio + pos[0]
        bbs[:,1] = bbs[:,1]*zoom_ratio + pos[1]
        bbs[:,2] = bbs[:,2]*zoom_ratio
        bbs[:,3] = bbs[:,3]*zoom_ratio        

        bbs[:,0] = bbs[:,0]*self.resize_width/img_width
        bbs[:,2] = bbs[:,2]*self.resize_width/img_width
        bbs[:,1] = bbs[:,1]*self.resize_height/img_height
        bbs[:,3] = bbs[:,3]*self.resize_height/img_height
        
        #compute all ious  
        feature_map_height = self.resize_height / self.sliding_window_stride
        feature_map_width = self.resize_width / self.sliding_window_stride
        size_num = len(self.sliding_window_height)
                      
        anchor_bbs = np.zeros((size_num*feature_map_height*feature_map_width,4),dtype = np.float64)
        for size_index in range(size_num):
            h=self.sliding_window_height[size_index]
            w=self.sliding_window_width[size_index]
            xs = np.arange(feature_map_width) * self.sliding_window_stride + self.sliding_window_stride/2-1 - w/2
            for y_index in range(feature_map_height):                  
                y=y_index*self.sliding_window_stride + self.sliding_window_stride/2-1 - h/2
                ind = size_index*feature_map_height*feature_map_width + y_index*feature_map_width
                anchor_bbs[ind : ind + feature_map_width,0] = xs
                anchor_bbs[ind : ind + feature_map_width,2] = xs + w
                anchor_bbs[ind : ind + feature_map_width,1] = y
                anchor_bbs[ind : ind + feature_map_width,3] = y + h
        
        bbs2 =  np.zeros((len(bbs),4), dtype = np.float64)
        bbs2[:,0:2] = bbs[:,0:2]
        bbs2[:,2:4] = bbs[:,0:2] + bbs[:,2:4]    
        iou = cython_bbox.bbox_overlaps(anchor_bbs,bbs2)                
        
  
        #anchor box and gt box assignment
        pos_anchor=list()
        anchor_fired_bbs = list()        
        neg_anchor=list()
        bbs_fire_list = np.zeros(len(bbs),dtype=np.int8)
        for size_index in range(size_num):
            h=self.sliding_window_height[size_index]
            w=self.sliding_window_width[size_index] 
            for y_index in range(feature_map_height):
                y=y_index*self.sliding_window_stride + self.sliding_window_stride/2-1 - h/2
                for x_index in range(feature_map_width):
                    x=x_index*self.sliding_window_stride + self.sliding_window_stride/2-1 - w/2
                    anchor_box = [x,y,w,h, x_index, y_index, size_index]                      
                    anchor_index = size_index*feature_map_height*feature_map_width + y_index*feature_map_width + x_index
                    fired_bb = np.where(iou[anchor_index,:] > self.iou_positive_thres)[0]
                    max_iou = np.max(iou[anchor_index,:])                                                
                    if max_iou < self.iou_negative_thres:
                        neg_anchor.append(anchor_box)
                    elif max_iou > self.iou_positive_thres:
                        pos_anchor.append(anchor_box)
                        bb_ind = int(fired_bb[np.random.randint(len(fired_bb))])
                        anchor_fired_bbs.append(bb_ind)
                        bbs_fire_list[bb_ind] = 1
                                
        
        for j in range(len(bbs)):
            if bbs_fire_list[j] > 0:
                continue #this gt bb has been assigned an anchor box
#             print 'bbs[%d] is un-assigned' % j
            max_iou_anchor_ind = np.argmax(iou[:,j])
            size_index = max_iou_anchor_ind / (feature_map_height*feature_map_width)            
            y_index = (max_iou_anchor_ind % (feature_map_height*feature_map_width) ) / feature_map_width
            x_index = max_iou_anchor_ind % feature_map_width
            h=self.sliding_window_height[size_index]
            w=self.sliding_window_width[size_index]
            x=x_index*self.sliding_window_stride + self.sliding_window_stride/2-1 - w/2
            y=y_index*self.sliding_window_stride + self.sliding_window_stride/2-1 - h/2
            anchor_box = [x,y,w,h, x_index, y_index, size_index]
            pos_anchor.append(anchor_box)
            anchor_fired_bbs.append(j)

        pos_anchor = np.array(pos_anchor)
        anchor_fired_bbs = np.array(anchor_fired_bbs)
        neg_anchor = np.array(neg_anchor)
        
        
        #sampling from pos_anchor and neg_anchor
        sampling_param = np.zeros([self.batch_size, 7], dtype=np.float32)
        tags = np.zeros([1, 5*len(self.sliding_window_width),feature_map_height,feature_map_width],dtype=np.float32)
        
        rnd_perm = np.random.permutation(len(pos_anchor))        
        pos_anchor = pos_anchor[rnd_perm]
        anchor_fired_bbs = anchor_fired_bbs[rnd_perm]
        neg_anchor = np.random.permutation(neg_anchor)
         
        pos_num_in_batch = min([self.batch_size,len(pos_anchor)])
        
        for i in range(pos_num_in_batch):
            x = pos_anchor[i][0]
            y = pos_anchor[i][1]
            w = pos_anchor[i][2]
            h = pos_anchor[i][3]
            x_index = pos_anchor[i][4]
            y_index = pos_anchor[i][5]
            size_index = pos_anchor[i][6]
            tags[0,0+5*size_index,y_index,x_index]=1.0
            gt = bbs[anchor_fired_bbs[i]]
            tags[0,1+5*size_index,y_index,x_index]=(gt[0] + 0.5*gt[2] - x - 0.5*w) / w
            tags[0,2+5*size_index,y_index,x_index]=(gt[1] + 0.5*gt[3] - y - 0.5*h) / h
            tags[0,3+5*size_index,y_index,x_index]=np.log(np.float32(gt[2])/w)
            tags[0,4+5*size_index,y_index,x_index]=np.log(np.float32(gt[3])/h)
            sampling_param[i,:] = pos_anchor[i]
        
        
        if pos_num_in_batch < self.batch_size:
            neg_anchor_num = len(neg_anchor)
            for i in range(pos_num_in_batch,self.batch_size):
                sampling_param[i,:] = neg_anchor[(i - pos_num_in_batch) % neg_anchor_num]
        
        if np.random.randint(50)==0:
            print '[%s] pos_anchor: %d, neg_anchor:%d' % (self.py_fn, len(pos_anchor), len(neg_anchor))
        top[1].data[...]=tags    
        top[2].data[...]=sampling_param   
        
        top[3].data[...]=bbs
        self.iter += 1