예제 #1
0
def image_eval(pred, gt, ignore, iou_thresh):
    """ single image evaluation
    pred: Nx5
    gt: Nx4
    ignore:
    """
    _pred = pred.copy()
    _gt = gt.copy()
    pred_recall = np.zeros(_pred.shape[0])
    recall_list = np.zeros(_gt.shape[0])
    proposal_list = np.ones(_pred.shape[0])

    _pred[:, 2] = _pred[:, 2] + _pred[:, 0]
    _pred[:, 3] = _pred[:, 3] + _pred[:, 1]
    _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
    _gt[:, 3] = _gt[:, 3] + _gt[:, 1]

    overlaps = bbox_overlaps(_pred[:, :4], _gt)

    for h in range(_pred.shape[0]):

        gt_overlap = overlaps[h]
        max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
        if max_overlap >= iou_thresh:
            if ignore[max_idx] == 0:
                recall_list[max_idx] = -1
                proposal_list[h] = -1
            elif recall_list[max_idx] == 0:
                recall_list[max_idx] = 1

        r_keep_index = np.where(recall_list == 1)[0]
        pred_recall[h] = len(r_keep_index)
    return pred_recall, proposal_list
예제 #2
0
def proposal_target_layer(rpn_bbox, rpn_cls_prob, gt_boxes, num_classes):
    confidence_scores = rpn_cls_prob[:, 1]

    # Add ground truth boxes as part of proposals
    rpn_bbox = np.vstack([rpn_bbox, gt_boxes[:, 0:-1]])
    confidence_scores = np.concatenate(confidence_scores,
                                       np.ones(gt_boxes.shape[0], np.float32))

    # Sample objects and backgrounds
    fg_cnt = int(BATCH_SIZE * FG_RATIO)
    fg_idxs = np.where(confidence_scores >= 0.5)[0]
    if len(fg_idxs) > fg_cnt:
        pos_inds = np.random.choice(fg_idxs, size=fg_cnt, replace=False)
    bg_cnt = BATCH_SIZE - len(pos_inds)
    bg_idxs = np.where((confidence_scores >= 0.1)
                       & (confidence_scores < 0.5))[0]
    if len(bg_idxs) > bg_cnt:
        neg_inds = np.random.choice(bg_idxs, size=bg_cnt, replace=False)

    pos_bbox = rpn_bbox[pos_inds]
    overlaps = bbox.bbox_overlaps(pos_bbox, gt_boxes[:, 0:-1])
    argmax_overlaps = np.argmax(overlaps, axis=-1)
    pos_labels = gt_boxes[:, -1][argmax_overlaps] + 1
    neg_labels = np.zeros(len(neg_inds), np.int32)
    labels = np.concatenate(pos_labels, neg_labels)
    bbox_reg = np.zeros([len(labels), (num_classes + 1) * 4], np.float32)
    bbox_reg_ = bbox.bbox_transform(rpn_bbox,
                                    gt_boxes[argmax_overlaps][:, :-1])
    for i in range(len(pos_labels)):
        bbox_reg[i, pos_labels[i] * 4:(pos_labels[i] + 1) * 4] = bbox_reg_[i]

    neg_bbox = rpn_bbox[neg_inds]
    rpn_bbox = np.vstack([pos_bbox, neg_bbox])

    return labels, bbox_reg, rpn_bbox
예제 #3
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
          'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in range(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes),
                                dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes':
                boxes,
                'gt_classes':
                np.zeros((num_boxes, ), dtype=np.int32),
                'gt_overlaps':
                overlaps,
                'flipped':
                False,
                'seg_areas':
                np.zeros((num_boxes, ), dtype=np.float32),
            })
        return roidb
예제 #4
0
def imgEval2(pred, gt, iou_thresh, finalHolder):
    """ single image evaluation
    pred: Nx5
    gt: Nx4
    ignore:
    """
    print(pred.shape, gt.shape)
    _pred = pred.copy()

    _gt = gt.copy()
    pred_recall = np.zeros(_pred.shape[0])
    recall_list = np.zeros(_gt.shape[0])
    proposal_list = np.ones(_pred.shape[0])

    _pred[:, 2] = _pred[:, 2] + _pred[:, 0]
    _pred[:, 3] = _pred[:, 3] + _pred[:, 1]
    _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
    _gt[:, 3] = _gt[:, 3] + _gt[:, 1]

    overlaps = bbox_overlaps(_pred[:, :4], _gt)
    print(overlaps.shape)
    overlaps = overlaps.T
    _p = pred.copy()

    #addin another parameter
    z = np.array([[0] for i in range(_p.shape[0])])
    print("shape of z is ")
    print(z.shape)
    _p = np.concatenate((_p, z), axis=1)
    #doing it for second time
    _p = np.concatenate((_p, z), axis=1)
    print(_p)
    # input()

    for h in range(_gt.shape[0]):

        pred_overlap = overlaps[h]
        max_overlap, max_idx = pred_overlap.max(), pred_overlap.argmax()
        if (max_overlap >= _p[max_idx][6]):
            _p[max_idx][6] = max_overlap
            _p[max_idx][5] = 1
        else:
            _p[max_idx][5] = 1

        # finalHolder.append([max_overlap,_pred[h][4])
        # if max_overlap >= iou_thresh:
        #     if ignore[max_idx] == 0:
        #         recall_list[max_idx] = -1
        #         proposal_list[h] = -1
        #     elif recall_list[max_idx] == 0:
        #         recall_list[max_idx] = 1

        # r_keep_index = np.where(recall_list == 1)[0]
        # pred_recall[h] = len(r_keep_index)

    for h in range(_pred.shape[0]):

        finalHolder.append([_p[h][4], _p[h][5], _p[h][6]])
예제 #5
0
    def _sample_rois(self, all_rois, gt_boxes, fg_rois_per_image,
                     rois_per_image, num_classes):
        """Generate a random sample of RoIs comprising foreground and background
        examples.
        """
        # overlaps: (rois x gt_boxes)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

        # Select foreground RoIs as those with >= FG_THRESH overlap
        fg_inds = np.where(max_overlaps >= self.FG_THRESH)[0]
        # Guard against the case when an image has fewer than fg_rois_per_image
        # foreground RoIs
        fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
        # Sample foreground regions without replacement
        if fg_inds.size > 0:
            fg_inds = npr.choice(fg_inds,
                                 size=fg_rois_per_this_image,
                                 replace=False)

        # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
        bg_inds = np.where((max_overlaps < self.BG_THRESH_HI)
                           & (max_overlaps >= self.BG_THRESH_LO))[0]
        # Compute number of background RoIs to take from this image (guarding
        # against there being fewer than desired)
        bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
        bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
        # Sample background regions without replacement
        if bg_inds.size > 0:
            bg_inds = npr.choice(bg_inds,
                                 size=bg_rois_per_this_image,
                                 replace=False)

        # The indices that we're selecting (both fg and bg)
        keep_inds = np.append(fg_inds, bg_inds)
        # Select sampled values from various arrays:
        labels = labels[keep_inds]
        # Clamp labels for the background RoIs to 0
        labels[fg_rois_per_this_image:] = 0
        rois = all_rois[keep_inds]

        bbox_target_data = self._compute_targets(
            rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

        bbox_targets, bbox_inside_weights = \
            self._get_bbox_regression_labels(bbox_target_data, num_classes)

        return labels, rois, bbox_targets, bbox_inside_weights
예제 #6
0
    def _calc_overlaps(self, anchors, gt_boxes, inds_inside):
        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        return argmax_overlaps, max_overlaps, gt_max_overlaps, \
            gt_argmax_overlaps
def image_eval(pred, gt, ignore, iou_thresh):
    """ single image evaluation
    pred: Nx5
    gt: Nx4
    ignore:
    """

    _pred = pred.copy()
    _gt = gt.copy()
    pred_recall = np.zeros(_pred.shape[0])
    recall_list = np.zeros(_gt.shape[0])
    proposal_list = np.ones(_pred.shape[0])
    #print('image_eval_gt: ',_gt)
    #print('_pred.shape[0]:',_pred.shape[0])
    #print('_gt_size_befor:', _gt.shape)
    #idx = []
    #for i in range(_gt.shape[0]):
    #    if _gt[i,2] > 40 or _gt[i,2] > 100 or _gt[i,3] < 40 or _gt[i,3] > 100:
    #        idx.append(i)
    #_gt = np.delete(_gt, idx)
    #print('_gt_size_after:', _gt.shape)
    _pred[:, 2] = _pred[:, 2] + _pred[:, 0]
    _pred[:, 3] = _pred[:, 3] + _pred[:, 1]
    _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
    _gt[:, 3] = _gt[:, 3] + _gt[:, 1]

    overlaps = bbox_overlaps(_pred[:, :4], _gt)

    for h in range(_pred.shape[0]):

        gt_overlap = overlaps[h]
        max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
        if max_overlap >= iou_thresh:
            if ignore[max_idx] == 0:
                recall_list[max_idx] = -1
                proposal_list[h] = -1
            elif recall_list[max_idx] == 0:
                recall_list[max_idx] = 1
        #print('recall_list: ',recall_list)
        r_keep_index = np.where(recall_list == 1)[0]
        #print('r_keep_index:', h,' : ',len(r_keep_index))
        pred_recall[h] = len(r_keep_index)
    #print('recall_list: ',recall_list)
    #print('r_keep_index: ',r_keep_index)
    return pred_recall, proposal_list
예제 #8
0
def image_eval(pred, gt, iou_thresh, name, output, _match, _least):
    """ single image evaluation
    pred: Nx5
    gt: Nx4
    ignore:
    """
    _pred = pred.copy()
    _gt = gt.copy()

    _pred[:, 2] = _pred[:, 2] + _pred[:, 0]
    _pred[:, 3] = _pred[:, 3] + _pred[:, 1]
    _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
    _gt[:, 3] = _gt[:, 3] + _gt[:, 1]

    overlaps = bbox_overlaps(_pred[:, :4], _gt)
    match = 0

    for h in range(_pred.shape[0]):

        gt_overlap = overlaps[h]
        max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
        if max_overlap >= iou_thresh:
            match += 1

    if match == _match and gt.shape[0] > _least:
        items = name[0][0].split("_")
        folder = event_dir[items[0]]
        image_path = '../data/widerface/val/images/' + folder + "/" + name[0][0] + ".jpg"
        print(image_path)
        image = cv2.imread(image_path)

        img1 = image.copy()
        for i in range(np.shape(_gt)[0]):
            p1 = int(_gt[i][0]), int(_gt[i][1])
            p2 = int(_gt[i][2]), int(_gt[i][3])
            cv2.rectangle(img1, p1, p2, (0, 0, 255), thickness=1, lineType=cv2.LINE_AA)
        cv2.imwrite(output + name[0][0] + "gt.jpg", img1)
        img2 = image.copy()
        for i in range(np.shape(_pred)[0]):
            p1 = int(_pred[i][0]), int(_pred[i][1])
            p2 = int(_pred[i][2]), int(_pred[i][3])
            cv2.rectangle(img2, p1, p2, (0, 0, 255), thickness=1, lineType=cv2.LINE_AA)
        cv2.imwrite(output + name[0][0] + "pred.jpg", img2)
예제 #9
0
def neel_image_eval(pred, gt, finalHolder):
    """ single image evaluation
    pred: Nx5
    gt: Nx4
    ignore:
    """
    # print(pred.shape,gt.shape)
    print(pred, gt)
    input()
    _pred = pred.copy()
    _gt = gt.copy()
    pred_recall = np.zeros(_pred.shape[0])
    recall_list = np.zeros(_gt.shape[0])
    proposal_list = np.ones(_pred.shape[0])

    _pred[:, 2] = _pred[:, 2] + _pred[:, 0]
    _pred[:, 3] = _pred[:, 3] + _pred[:, 1]
    _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
    _gt[:, 3] = _gt[:, 3] + _gt[:, 1]
    # print(_gt[0])
    overlaps = bbox_overlaps(_pred[:, :4], _gt)
    print(overlaps)
    # input()
    # print(overlaps.shape)
    print("----------")
    # input()

    for h in range(_pred.shape[0]):

        gt_overlap = overlaps[h]
        max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()

        finalHolder.append([max_overlap, _pred[h][4]])
        # if max_overlap >= iou_thresh:
        #     if ignore[max_idx] == 0:
        #         recall_list[max_idx] = -1
        #         proposal_list[h] = -1
        #     elif recall_list[max_idx] == 0:
        #         recall_list[max_idx] = 1

        r_keep_index = np.where(recall_list == 1)[0]
        pred_recall[h] = len(r_keep_index)
    return pred_recall, proposal_list
예제 #10
0
def image_eval(pred, gt, ignore, iou_thresh):
    """ 
    single image evaluation.
    pred: Nx5
    gt: Nx4
    ignore:
    """

    _pred = pred.copy()
    _gt = gt.copy()
    pred_recall = np.zeros(_pred.shape[0])
    recall_list = np.zeros(_gt.shape[0])
    proposal_list = np.ones(_pred.shape[0])

    _pred[:, 2] = _pred[:, 2] + _pred[:, 0] # xmax = xmin + w
    _pred[:, 3] = _pred[:, 3] + _pred[:, 1] # ymax = ymin + h
    _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
    _gt[:, 3] = _gt[:, 3] + _gt[:, 1]

    overlaps = bbox_overlaps(_pred[:, :4], _gt)

    for h in range(_pred.shape[0]):

        gt_overlap = overlaps[h]
        max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax() # 1 number only.
        
        if max_overlap >= iou_thresh:
            # if pred n not correspond to sub gt.
            if ignore[max_idx] == 0: # pred n does not hit any of the sub gts.
                recall_list[max_idx] = -1
                proposal_list[h] = -1
            elif recall_list[max_idx] == 0: # pred n hit 1 sub gt, and the sub gt not hited before.
                recall_list[max_idx] = 1

        r_keep_index = np.where(recall_list == 1)[0] # index of recall_list==1
        pred_recall[h] = len(r_keep_index)
    return pred_recall, proposal_list
예제 #11
0
def anchor_target_layer(gt_boxes, all_anchors, image_shape, feature_map_shape, k):
    """
    
    :param gt_boxes: 
    :param all_anchors: 
    :param image_shape: 
    :param feature_map_shape: 
    :param k: 
    :return: 
    """

    # If there is no object in the image
    if len(gt_boxes) == 0:
        labels = np.zeros((len(all_anchors),), dtype=np.int32)
        targets = np.zeros(all_anchors.shape, dtype=np.float32)
        return labels, targets

    num_total_anchors = all_anchors.shape[0]

    # Keep anchors that inside the image
    valid_idx = np.where((all_anchors[:, 0] >= 0) &
                         (all_anchors[:, 1] >= 0) &
                         (all_anchors[:, 2] < image_shape[1]) &
                         (all_anchors[:, 3] < image_shape[0]))[0]

    anchors = all_anchors[valid_idx, :]

    labels = np.empty((len(valid_idx),), dtype=np.int32)
    labels.fill(-1)

    overlaps = bbox.bbox_overlaps(anchors, gt_boxes)
    argmax_overlaps = np.argmax(overlaps, axis=1)
    max_overlaps = overlaps[np.arange(0, len(valid_idx), 1), argmax_overlaps]
    gt_argmax_overlaps = np.argmax(overlaps, axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(0, gt_boxes.shape[0], 1)]
    gt_max_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    labels[np.where(max_overlaps < BG_LOW_THRES)[0]] = 0
    labels[gt_max_overlaps] = 1
    labels[np.where(max_overlaps > FG_HIGH_THRES)[0]] = 1

    targets = bbox.bbox_transform(anchors, gt_boxes[argmax_overlaps, :])

    # Sampling positive and negative anchors
    fg_cnt = int(SAMPLE_NUMBER * FG_RATIO)
    fg_idxs = np.where(labels == 1)[0]
    if len(fg_idxs) > fg_cnt:
        disable_inds = np.random.choice(
            fg_idxs, size=(len(fg_idxs) - fg_cnt), replace=False)
        labels[disable_inds] = -1

    bg_cnt = SAMPLE_NUMBER - np.sum(labels == 1)
    bg_idxs = np.where(labels == 0)[0]
    if len(bg_idxs) > bg_cnt:
        disable_inds = np.random.choice(
            bg_idxs, size=(len(bg_idxs) - bg_cnt), replace=False)
        labels[disable_inds] = -1

    labels = _unmap(labels, num_total_anchors, valid_idx, -1)
    targets = _unmap(targets, num_total_anchors, valid_idx, 0)

    return labels, targets
예제 #12
0
def anchor_target_layer(rpn_cls_score,
                        gt_boxes,
                        im_info,
                        data,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[4, 8, 16, 32]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    """

    # 对应了input[0~3] 'rpn_cls_score', 'gt_boxes', 'im_info', 'data'
    # input[0], rpn_cls_score, [batch_size, w, h, 18] , is the output of convolution
    # input[1], gt_boxes, [batch_size, 5],
    # input[2], im_info, [batch_size, 3], width, height, channel of image ??
    # input[4], data, [batch_size, w, h, 3], image

    # _feat_stride = [16,]
    # anchor_scales = [8, 16, 32]

    # generate the anchors by the aspect ratios and the scales. base anchor = [0, 0, 15, 15]
    _anchors = generate_anchors(scales=np.array(
        anchor_scales))  # the number of anchors is 3*len(anchor_scales)
    _num_anchors = _anchors.shape[0]  # the number of anchors

    if DEBUG:
        print 'anchors:'
        print _anchors
        print 'anchor shapes:'
        print np.hstack((
            _anchors[:, 2::4] - _anchors[:, 0::4],
            _anchors[:, 3::4] - _anchors[:, 1::4],
        ))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0
    # map of shape (..., H, W)
    #height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    # the height and width of the feature map
    height, width = rpn_cls_score.shape[1:3]

    if DEBUG:
        print 'AnchorTargetLayer: height', height, 'width', width
        print ''
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])
        print 'height, width: ({}, {})'.format(height, width)
        print 'rpn: gt_boxes.shape', gt_boxes.shape
        print 'rpn: gt_boxes', gt_boxes

    # 1. Generate proposals from bbox deltas and shifted anchors
    # 生成proposal
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]  # the number of pixels in feature map
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))
    # 对应的features map里面的每个像素点都有A个像素点
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print 'total_anchors', total_anchors
        print 'inds_inside', len(inds_inside)

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print 'anchors.shape', anchors.shape

    # proposal 大体分为在原图像内部的,和有部分在原图像外部的
    # 在原图像外部的,我们舍弃
    # 在原图像内部的,我们又分为三种, positive,negative以及don't care
    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    # np.ascontiguousarray 生成内存连续的数组
    # overlaps: (N, K) ndarray of overlap between boxes and query_boxes,其中N代表的是len(anchor), K代表的是len(gt_boxs)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))

    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    # we set the value of RPN_BATCHSIZE to 256 which represent the number of proposals in each batch
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
        #print "was %s inds, disabling %s, now %s inds" % (
        #len(bg_inds), len(disable_inds), np.sum(labels == 0))
    # So far, we have determine the number of proposals is the RPN_BATCHSIZE
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # return the [targets_dx, targets_dy, targets_dw, targets_dh] represent the difference between the anchors and gt
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print 'means:'
        print means
        print 'stdevs:'
        print stds

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    if DEBUG:
        print 'rpn: max max_overlap', np.max(max_overlaps)
        print 'rpn: num_positive', np.sum(labels == 1)
        print 'rpn: num_negative', np.sum(labels == 0)
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print 'rpn: num_positive avg', _fg_sum / _count
        print 'rpn: num_negative avg', _bg_sum / _count

    # labels
    #pdb.set_trace()
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
    #assert bbox_inside_weights.shape[2] == height
    #assert bbox_inside_weights.shape[3] == width

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
    #assert bbox_outside_weights.shape[2] == height
    #assert bbox_outside_weights.shape[3] == width

    rpn_bbox_outside_weights = bbox_outside_weights
    # rpn_labels represent the label of each proposal, 1 is positive, 0 is negative, -1 is dont care
    # rpn_bbox_targets 表示了proposal的中心坐标,长宽与ground truth的中心坐标,长宽的差值
    # rpn_bbox_inside_weights, rpn_bbox_outside_weights分别表示了计算loss的两个系数
    # rpn_bbox_inside_weights 可用于指定那些结果参与 smooth L1 loss的运算, 注意只有positive proposal参与运算,其他都为0
    # rpn_bbox_outside_weights用于normalization, 代表的是参与运算的proposal的权重
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
예제 #13
0
def proposal(img, gt_bboxes, detector=None):
    '''given an image with face bboxes, proposal negatives, positives and part faces
  for rNet and oNet, we use previous networks to proposal bboxes
  Return
    (negatives, positives, part)
    negatives: [data, bbox]
    positives: [(data, bbox, bbox_target)]
    part: [(data, bbox, bbox_target)]
  '''
    # ======================= proposal for rnet and onet ==============
    if detector is not None:
        assert isinstance(detector, JfdaDetector)
        #print("HERE??>>96")
        bboxes = detector.detect(img, **cfg.DETECT_PARAMS)
        # # maybe sort it by score in descending order
        # bboxes = bboxes[bboxes[:, 4].argsort()[::-1]]
        # keep bbox info, drop score, offset and landmark
        bboxes = bboxes[:, :4]
        ovs = bbox_overlaps(bboxes, gt_bboxes)
        ovs_max = ovs.max(axis=1)
        ovs_idx = ovs.argmax(axis=1)
        pos_idx = np.where(ovs_max > cfg.FACE_OVERLAP)[0]
        neg_idx = np.where(ovs_max < cfg.NONFACE_OVERLAP)[0]
        part_idx = np.where(
            np.logical_and(ovs_max > cfg.PARTFACE_OVERLAP,
                           ovs_max <= cfg.FACE_OVERLAP))[0]
        # pos
        positives = []
        for idx in pos_idx:
            bbox = bboxes[idx].reshape(4)
            gt_bbox = gt_bboxes[ovs_idx[idx]]
            data = crop_face(img, bbox)
            if data is None:
                continue
            # cv2.imshow('pos', data)
            # cv2.waitKey()
            k = bbox[2] - bbox[0]
            bbox_target = (gt_bbox - bbox) / k
            positives.append((data, bbox, bbox_target))
        # part
        part = []
        for idx in part_idx:
            bbox = bboxes[idx].reshape(4)
            gt_bbox = gt_bboxes[ovs_idx[idx]]
            data = crop_face(img, bbox)
            if data is None:
                continue
            # cv2.imshow('part', data)
            # cv2.waitKey()
            k = bbox[2] - bbox[0]
            bbox_target = (gt_bbox - bbox) / k
            part.append((data, bbox, bbox_target))
        # neg
        negatives = []
        np.random.shuffle(neg_idx)
        for idx in neg_idx[:cfg.NEG_DETECT_PER_IMAGE]:
            bbox = bboxes[idx].reshape(4)
            data = crop_face(img, bbox)
            if data is None:
                continue
            # cv2.imshow('neg', data)
            # cv2.waitKey()
            negatives.append((data, bbox))
        return negatives, positives, part

    # ======================= proposal for pnet =======================
    height, width = img.shape[:-1]
    negatives, positives, part = [], [], []

    # ===== proposal positives =====
    for gt_bbox in gt_bboxes:
        x, y = gt_bbox[:2]
        w, h = gt_bbox[2] - gt_bbox[0], gt_bbox[3] - gt_bbox[1]
        this_positives = []
        for scale in cfg.POS_PROPOSAL_SCALES:
            k = max(w, h) * scale
            stride = cfg.POS_PROPOSAL_STRIDE
            s = k * stride
            offset_x = (0.5 + np.random.rand()) * k / 2.
            offset_y = (0.5 + np.random.rand()) * k / 2.
            candidates = sliding_windows(x - offset_x, y - offset_y,
                                         w + 2 * offset_x, h + 2 * offset_y, k,
                                         k, s, s)
            ovs = bbox_overlaps(candidates, gt_bbox.reshape((1, 4)))
            ovs = ovs.reshape((1, len(candidates)))[0]
            pos_bboxes = candidates[ovs > cfg.FACE_OVERLAP, :]
            # pdb.set_trace()
            if len(pos_bboxes) > 0:
                np.random.shuffle(pos_bboxes)

            for bbox in pos_bboxes[:cfg.POS_PER_FACE]:

                data = crop_face(img, bbox)
                if data is None:
                    continue
                # cv2.imshow('positive', data)
                # cv2.waitKey()
                bbox_target = (gt_bbox - bbox) / k
                this_positives.append((data, bbox, bbox_target))
        random.shuffle(this_positives)
        positives.extend(this_positives[:cfg.POS_PER_FACE])

    # ===== proposal part faces =====
    for gt_bbox in gt_bboxes:
        x, y = gt_bbox[:2]
        w, h = gt_bbox[2] - gt_bbox[0], gt_bbox[3] - gt_bbox[1]
        this_part = []
        for scale in cfg.PART_PROPOSAL_SCALES:
            k = max(w, h) * scale
            stride = cfg.PART_PROPOSAL_STRIDE
            s = k * stride
            offset_x = (0.5 + np.random.rand()) * k / 2.
            offset_y = (0.5 + np.random.rand()) * k / 2.
            candidates = sliding_windows(x - offset_x, y - offset_y,
                                         w + 2 * offset_x, h + 2 * offset_y, k,
                                         k, s, s)
            ovs = bbox_overlaps(candidates, gt_bbox.reshape((1, 4)))
            ovs = ovs.reshape((1, len(candidates)))[0]
            part_bboxes = candidates[np.logical_and(
                ovs > cfg.PARTFACE_OVERLAP, ovs <= cfg.FACE_OVERLAP), :]
            if len(part_bboxes) > 0:
                np.random.shuffle(part_bboxes)
            for bbox in part_bboxes[:cfg.PART_PER_FACE]:
                data = crop_face(img, bbox)
                if data is None:
                    continue
                # cv2.imshow('part', data)
                # cv2.waitKey()
                bbox_target = (gt_bbox - bbox) / k
                this_part.append((data, bbox, bbox_target))
        random.shuffle(this_part)
        part.extend(this_part[:cfg.POS_PER_FACE])

    # ===== proposal negatives =====
    for gt_bbox in gt_bboxes:
        x, y = gt_bbox[:2]
        w, h = gt_bbox[2] - gt_bbox[0], gt_bbox[3] - gt_bbox[1]
        this_negatives = []
        for scale in cfg.NEG_PROPOSAL_SCALES:
            k = max(w, h) * scale
            stride = cfg.NEG_PROPOSAL_STRIDE
            s = k * stride
            offset_x = (0.5 + np.random.rand()) * k / 2.
            offset_y = (0.5 + np.random.rand()) * k / 2.
            candidates = sliding_windows(x - offset_x, y - offset_y,
                                         w + 2 * offset_x, h + 2 * offset_y, k,
                                         k, s, s)
            ovs = bbox_overlaps(candidates, gt_bboxes)
            neg_bboxes = candidates[ovs.max(axis=1) < cfg.NONFACE_OVERLAP, :]
            if len(neg_bboxes) > 0:
                np.random.shuffle(neg_bboxes)
            for bbox in neg_bboxes[:cfg.NEG_PER_FACE]:
                data = crop_face(img, bbox)
                if data is None:
                    continue
                # cv2.imshow('negative', data)
                # cv2.waitKey()
                this_negatives.append((data, bbox))
        random.shuffle(this_negatives)
        negatives.extend(this_negatives[:cfg.NEG_PER_FACE])

    # negatives from global image random crop
    max_num_from_fr = int(cfg.NEG_PER_IMAGE * cfg.NEG_FROM_FR_RATIO)
    if len(negatives) > max_num_from_fr:
        random.shuffle(negatives)
        negatives = negatives[:max_num_from_fr]
    bbox_neg = []
    range_x, range_y = width - cfg.NEG_MIN_SIZE, height - cfg.NEG_MIN_SIZE
    for i in range(0, cfg.NEG_PROPOSAL_RATIO * cfg.NEG_PER_IMAGE):
        x1, y1 = np.random.randint(range_x), np.random.randint(range_y)
        w = h = np.random.randint(low=cfg.NEG_MIN_SIZE,
                                  high=min(width - x1, height - y1))
        x2, y2 = x1 + w, y1 + h
        bbox_neg.append([x1, y1, x2, y2])
        if x2 > width or y2 > height:
            print('hhhh')
    bbox_neg = np.asarray(bbox_neg, dtype=gt_bboxes.dtype)
    ovs = bbox_overlaps(bbox_neg, gt_bboxes)
    bbox_neg = bbox_neg[ovs.max(axis=1) < cfg.NONFACE_OVERLAP]
    np.random.shuffle(bbox_neg)
    if not cfg.NEG_FORCE_BALANCE:
        remain = cfg.NEG_PER_IMAGE - len(negatives)
    else:
        # balance ratio from face region and global crop
        remain = len(negatives) * (
            1. - cfg.NEG_FROM_FR_RATIO) / cfg.NEG_FROM_FR_RATIO
        remain = int(remain)
    bbox_neg = bbox_neg[:remain]

    # for bbox in bbox_neg:
    #   x1, y1, x2, y2 = bbox
    #   x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
    #   cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 1)
    # cv2.imshow('neg', img)
    # cv2.waitKey()

    for bbox in bbox_neg:
        data = crop_face(img, bbox)
        negatives.append((data, bbox))
    return negatives, positives, part
예제 #14
0
    def evaluate_recall(self,
                        candidate_boxes=None,
                        thresholds=None,
                        area='all',
                        limit=None):
        """Evaluate detection proposal recall metrics.

    Returns:
        results: dictionary of results with keys
            'ar': average recall
            'recalls': vector recalls at each IoU overlap threshold
            'thresholds': vector of IoU overlap thresholds
            'gt_overlaps': vector of all ground-truth overlaps
    """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = {
            'all': 0,
            'small': 1,
            'medium': 2,
            'large': 3,
            '96-128': 4,
            '128-256': 5,
            '256-512': 6,
            '512-inf': 7
        }
        area_ranges = [
            [0**2, 1e5**2],  # all
            [0**2, 32**2],  # small
            [32**2, 96**2],  # medium
            [96**2, 1e5**2],  # large
            [96**2, 128**2],  # 96-128
            [128**2, 256**2],  # 128-256
            [256**2, 512**2],  # 256-512
            [512**2, 1e5**2],  # 512-inf
        ]
        assert area in areas, 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in range(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(
                axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0)
                               & (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0])
                                     & (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in range(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert (gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert (_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {
            'ar': ar,
            'recalls': recalls,
            'thresholds': thresholds,
            'gt_overlaps': gt_overlaps
        }
예제 #15
0
def anchor_target_layer(rpn_cls_score,
                        gt_boxes,
                        im_info,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[
                            16,
                        ]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    Parameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
    rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
                            that are the regression objectives
    rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg
    rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg,
                            beacuse the numbers of bgs and fgs mays significiantly different
    """
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  # 生成基本的anchor,一共9个
    _num_anchors = _anchors.shape[0]  # 9个anchor

    if DEBUG:
        print('anchors:')
        print(_anchors)
        print('anchor shapes:')
        print(
            np.hstack((
                _anchors[:, 2::4] - _anchors[:, 0::4],
                _anchors[:, 3::4] - _anchors[:, 1::4],
            )))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0
    # map of shape (..., H, W)
    # height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]  # 图像的高宽及通道数
    if DEBUG:
        print("im_info: ", im_info)
    # 在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标
    # Algorithm:
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[
        0] == 1, 'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]  # feature-map的高宽

    if DEBUG:
        print('AnchorTargetLayer: height', height, 'width', width)
        print('')
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))
        print('height, width: ({}, {})'.format(height, width))
        print('rpn: gt_boxes.shape', gt_boxes.shape)
        print('rpn: gt_boxes', gt_boxes)

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)  # in W H order
    # K is H x W
    shifts = np.vstack(
        (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
         shift_y.ravel())).transpose()  # 生成feature-map和真实image上anchor之间的偏移量
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors  # 9个anchor
    K = shifts.shape[0]  # 50*37,feature-map的宽乘高的大小
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))  # 相当于复制宽高的维度,然后相加
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    # 仅保留那些还在图像内部的anchor,超出图像的都删掉
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]  # 保留那些在图像内的anchor
    if DEBUG:
        print('anchors.shape', anchors.shape)

    # 至此,anchor准备好了
    # --------------------------------------------------------------
    # label: 1 is positive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)  # 初始化label,均为-1

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    # 计算anchor和gt-box的overlap,用来给anchor上标签
    overlaps = bbox_overlaps(np.ascontiguousarray(
        anchors, dtype=np.float), np.ascontiguousarray(
            gt_boxes,
            dtype=np.float))  # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
    # 存放每一个anchor和每一个gtbox之间的overlap
    argmax_overlaps = overlaps.argmax(
        axis=1)  # (A)#找到和每一个gtbox,overlap最大的那个anchor
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(
        axis=0)  # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps <
               cfg.RPN_NEGATIVE_OVERLAP] = 0  # 先给背景上标签,小于0.3overlap的

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1  # 每个位置上的9个anchor中overlap最大的认为是前景
    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.RPN_POSITIVE_OVERLAP] = 1  # overlap大于0.7的认为是前景

    if cfg.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    # 对正样本进行采样,如果正样本的数量太多的话
    # 限制正样本的数量不超过128个
    num_fg = int(cfg.RPN_FG_FRACTION * cfg.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)  # 随机去除掉一些正样本
        labels[disable_inds] = -1  # 变为-1

    # subsample negative labels if we have too many
    # 对负样本进行采样,如果负样本的数量太多的话
    # 正负样本总数是256,限制正样本数目最多128,
    # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本
    num_bg = cfg.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
        # print "was %s inds, disabling %s, now %s inds" % (
        # len(bg_inds), len(disable_inds), np.sum(labels == 0))

    # 至此, 上好标签,开始计算rpn-box的真值
    # --------------------------------------------------------------
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(
        anchors,
        gt_boxes[argmax_overlaps, :])  # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.RPN_BBOX_INSIDE_WEIGHTS)  # 内部权重,前景就给1,其他是0

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.RPN_POSITIVE_WEIGHT < 0:  # 暂时使用uniform 权重,也就是正样本是1,负样本是0
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0) + 1
        # positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        # negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        positive_weights = np.ones((1, 4))
        negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.RPN_POSITIVE_WEIGHT > 0) & (cfg.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1)) +
                            1)
        negative_weights = ((1.0 - cfg.RPN_POSITIVE_WEIGHT) /
                            (np.sum(labels == 0)) + 1)
    bbox_outside_weights[labels == 1, :] = positive_weights  # 外部权重,前景是1,背景是0
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print('means:')
        print(means)
        print('stdevs:')
        print(stds)

    # map up to original set of anchors
    # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
    labels = _unmap(labels, total_anchors, inds_inside,
                    fill=-1)  # 这些anchor的label是-1,也即dontcare
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside,
                          fill=0)  # 这些anchor的真值是0,也即没有值
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)  # 内部权重以0填充
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)  # 外部权重以0填充

    if DEBUG:
        print('rpn: max max_overlap', np.max(max_overlaps))
        print('rpn: num_positive', np.sum(labels == 1))
        print('rpn: num_negative', np.sum(labels == 0))
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print('rpn: num_positive avg', _fg_sum / _count)
        print('rpn: num_negative avg', _bg_sum / _count)

    # labels
    labels = labels.reshape((1, height, width, A))  # reshap一下label
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))  # reshape

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))
    rpn_bbox_outside_weights = bbox_outside_weights

    if DEBUG:
        print("anchor target set")
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
예제 #16
0
    def label_anchors(self,
                      anchors,
                      gt_anchors,
                      pos_threshold=0.7,
                      neg_threshold=0.5):
        """Label each anchor (text or non-text).

        Args:
            anchors: A numpy array with shape [num_anchors, 4] contains the
                coordinates of each anchor.
            gt_anchors: A numpy array with shape [num_gt_anchors, 4] contains
                the coordinates of each ground-truth anchor.
            pos_threshold: A IoU threshold for determining an anchor is
                positive.
            neg_threshold: A IoU threshold for determining an anchor is
                negative.

        Returns:
            cls_anchors: A numpy array with shape [num_anchors] contains
                the class of each anchor.
            pos_anchors: A numpy array with shape [num_pos_anchors, 4] contains
                the coordinates of each positive anchor.
        """
        # Array containing the label for each anchor
        cls_anchors = np.ones((anchors.shape[0]), dtype=np.int) * (-1)

        # Calculate the IoU between the anchors and the ground truth anchors
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_anchors, dtype=np.float))

        # Labeling anchors
        # i. Negative anchors (< 0.5 IoU overlap with all GT boxes)
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(anchors.shape[0]), argmax_overlaps]
        cls_anchors[max_overlaps < neg_threshold] = 0

        # ii. The anchors with the highest IoU overlap with GT boxes.
        highest_argmax_overlaps = overlaps.argmax(axis=0)
        cls_anchors[highest_argmax_overlaps] = 1
        highest_argmax_overlaps = np.array(
            [highest_argmax_overlaps,
             np.arange(len(highest_argmax_overlaps))])

        # iii. Anchors that have > threhsold IoU overlap with any GT box
        valid_argmax_overlaps = np.where(overlaps > pos_threshold)
        cls_anchors[valid_argmax_overlaps[0]] = 1

        mask = np.in1d(highest_argmax_overlaps[0], valid_argmax_overlaps[0])
        new_anchors_id = np.where(~mask)[0]
        if len(np.where(~mask)[0]) > 0:
            pos_anchors = (np.append(
                valid_argmax_overlaps[0],
                highest_argmax_overlaps[0][new_anchors_id]),
                           np.append(
                               valid_argmax_overlaps[1],
                               highest_argmax_overlaps[1][new_anchors_id]))
        else:
            pos_anchors = valid_argmax_overlaps

        return cls_anchors, pos_anchors