コード例 #1
0
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride = [16,], anchor_scales = [16,]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    Parameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
    rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
                            that are the regression objectives
    rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg
    rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg,
                            beacuse the numbers of bgs and fgs mays significiantly different
    """
    _anchors = generate_anchors(scales=np.array(anchor_scales))#生成基本的anchor,一共9个
    _num_anchors = _anchors.shape[0]#9个anchor

    if DEBUG:
        print('anchors:')
        print(_anchors)
        print('anchor shapes:')
        print(np.hstack((
            _anchors[:, 2::4] - _anchors[:, 0::4],
            _anchors[:, 3::4] - _anchors[:, 1::4],
        )))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border =  0
    # map of shape (..., H, W)
    #height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]#图像的高宽及通道数

    #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标
    # Algorithm:
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]#feature-map的高宽

    if DEBUG:
        print('AnchorTargetLayer: height', height, 'width', width)
        print('')
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))
        print('height, width: ({}, {})'.format(height, width))
        print('rpn: gt_boxes.shape', gt_boxes.shape)
        print('rpn: gt_boxes', gt_boxes)

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order
    # K is H x W
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()#生成feature-map和真实image上anchor之间的偏移量
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors#9个anchor
    K = shifts.shape[0]#50*37,feature-map的宽乘高的大小
    all_anchors = (_anchors.reshape((1, A, 4)) +
                   shifts.reshape((1, K, 4)).transpose((1, 0, 2)))#相当于复制宽高的维度,然后相加
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    #仅保留那些还在图像内部的anchor,超出图像的都删掉
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)    # height
    )[0]

    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]#保留那些在图像内的anchor
    if DEBUG:
        print('anchors.shape', anchors.shape)

    #至此,anchor准备好了
    #--------------------------------------------------------------
    # label: 1 is positive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)#初始化label,均为-1

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    #计算anchor和gt-box的overlap,用来给anchor上标签
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))#假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
    # 存放每一个anchor和每一个gtbox之间的overlap
    argmax_overlaps = overlaps.argmax(axis=1) # (A)#找到和每一个gtbox,overlap最大的那个anchor
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0) # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0#先给背景上标签,小于0.3overlap的

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1#每个位置上的9个anchor中overlap最大的认为是前景
    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1#overlap大于0.7的认为是前景

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # preclude dontcare areas
    if dontcare_areas is not None and dontcare_areas.shape[0] > 0:#这里我们暂时不考虑有doncare_area的存在
        # intersec shape is D x A
        intersecs = bbox_intersections(
            np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4
            np.ascontiguousarray(anchors, dtype=np.float) # A x 4
        )
        intersecs_ = intersecs.sum(axis=0) # A x 1
        labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1

    #这里我们暂时不考虑难样本的问题
    # preclude hard samples that are highly occlusioned, truncated or difficult to see
    if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[0] > 0:
        assert gt_ishard.shape[0] == gt_boxes.shape[0]
        gt_ishard = gt_ishard.astype(int)
        gt_hardboxes = gt_boxes[gt_ishard == 1, :]
        if gt_hardboxes.shape[0] > 0:
            # H x A
            hard_overlaps = bbox_overlaps(
                np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4
                np.ascontiguousarray(anchors, dtype=np.float)) # A x 4
            hard_max_overlaps = hard_overlaps.max(axis=0)  # (A)
            labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
            max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1
            labels[max_intersec_label_inds] = -1 #

    # subsample positive labels if we have too many
    #对正样本进行采样,如果正样本的数量太多的话
    # 限制正样本的数量不超过128个
    #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)#随机去除掉一些正样本
        labels[disable_inds] = -1#变为-1

    # subsample negative labels if we have too many
    #对负样本进行采样,如果负样本的数量太多的话
    # 正负样本总数是256,限制正样本数目最多128,
    # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1
        #print "was %s inds, disabling %s, now %s inds" % (
            #len(bg_inds), len(disable_inds), np.sum(labels == 0))

    # 至此, 上好标签,开始计算rpn-box的真值
    #--------------------------------------------------------------
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])#根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)


    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)#内部权重,前景就给1,其他是0

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:#暂时使用uniform 权重,也就是正样本是1,负样本是0
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0) + 1
        # positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        # negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        positive_weights = np.ones((1, 4))
        negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            (np.sum(labels == 1)) + 1)
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            (np.sum(labels == 0)) + 1)
    bbox_outside_weights[labels == 1, :] = positive_weights#外部权重,前景是1,背景是0
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means ** 2)
        print('means:')
        print(means)
        print('stdevs:')
        print(stds)

    # map up to original set of anchors
    # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)#这些anchor的label是-1,也即dontcare
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)#这些anchor的真值是0,也即没有值
    bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)#内部权重以0填充
    bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)#外部权重以0填充

    if DEBUG:
        print('rpn: max max_overlap', np.max(max_overlaps))
        print('rpn: num_positive', np.sum(labels == 1))
        print('rpn: num_negative', np.sum(labels == 0))
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print('rpn: num_positive avg', _fg_sum / _count)
        print('rpn: num_negative avg', _bg_sum / _count)

    # labels
    labels = labels.reshape((1, height, width, A))#reshap一下label
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))#reshape

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))
    rpn_bbox_outside_weights = bbox_outside_weights

    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
コード例 #2
0
def anchor_target_layer(rpn_cls_score,
                        gt_boxes,
                        gt_ishard,
                        dontcare_areas,
                        im_info,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[
                            16,
                        ]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    Parameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
    rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
                            that are the regression objectives
    rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg
    rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg,
                            beacuse the numbers of bgs and fgs mays significiantly different
    """
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  #生成基本的anchor,一共9个
    _num_anchors = _anchors.shape[0]  #9个anchor

    if DEBUG:
        print('anchors:')
        print(_anchors)
        print('anchor shapes:')
        print(
            np.hstack((
                _anchors[:, 2::4] - _anchors[:, 0::4],
                _anchors[:, 3::4] - _anchors[:, 1::4],
            )))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0
    # map of shape (..., H, W)
    #height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]  #图像的高宽及通道数

    #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标
    # Algorithm:
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]  #feature-map的高宽

    if DEBUG:
        print('AnchorTargetLayer: height', height, 'width', width)
        print('')
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))
        print('height, width: ({}, {})'.format(height, width))
        print('rpn: gt_boxes.shape', gt_boxes.shape)
        print('rpn: gt_boxes', gt_boxes)

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride  # (W)
    shift_y = np.arange(0, height) * _feat_stride  #(H)
    shift_x, shift_y = np.meshgrid(
        shift_x, shift_y)  # in W H order   # shift_x (H, W)  shift_y (H, W)
    # K is H x W
    shifts = np.vstack(
        (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel()
         )).transpose()  #生成feature-map和真实image上anchor之间的偏移量     #(H*W, 4)
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors  #9个anchor
    K = shifts.shape[0]  #50*37,feature-map的宽乘高的大小
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))  #相当于复制宽高的维度,然后相加
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    #仅保留那些还在图像内部的anchor,超出图像的都删掉
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]  #保留那些在图像内的anchor   (In, 4)
    if DEBUG:
        print('anchors.shape', anchors.shape)

    #至此,anchor准备好了
    #--------------------------------------------------------------
    # label: 1 is positive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)  #初始化label,均为-1

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    #计算anchor和gt-box的overlap,用来给anchor上标签
    overlaps = bbox_overlaps(np.ascontiguousarray(
        anchors, dtype=np.float), np.ascontiguousarray(
            gt_boxes,
            dtype=np.float))  #假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
    # 存放每一个anchor和每一个gtbox之间的overlap
    argmax_overlaps = overlaps.argmax(
        axis=1)  # (A)#找到和每一个anchor,overlap最大的那个gt
    max_overlaps = overlaps[np.arange(
        len(inds_inside)
    ), argmax_overlaps]  # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值
    gt_argmax_overlaps = overlaps.argmax(
        axis=0)  # G#找到所有anchor中与gtbox,overlap最大的那个anchor  # (3)
    if DEBUG:
        print('获取所有anchor中与gt相交最大的哪几个anchor的索引')
        print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape)
        print('gt_argmax_overlaps', gt_argmax_overlaps)
    gt_max_overlaps = overlaps[
        gt_argmax_overlaps, np.arange(
            overlaps.shape[1]
        )]  #  比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[
        0]  #  (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引
    if DEBUG:
        print('这一步是找到那些同样与gt有最大overlap的索引,上一步找到的4个,这一步找到其他重复的')
        print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape)
        print('gt_argmax_overlaps', gt_argmax_overlaps)

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps <
               cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0  #先给背景上标签,小于0.3overlap的

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1  #每个位置上的9个anchor中overlap最大的认为是前景
    # fg label: above threshold IOU
    labels[max_overlaps >=
           cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1  #overlap大于0.7的认为是前景

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    if DEBUG:
        print('在过滤数量之前:')
        print('正样本:', len(np.where(labels == 1)[0]))
        print('负样本:', len(np.where(labels == 0)[0]))
        print('忽略样本:', len(np.where(labels == -1)[0]))
    # preclude dontcare areas
    if dontcare_areas is not None and dontcare_areas.shape[
            0] > 0:  #这里我们暂时不考虑有doncare_area的存在
        # intersec shape is D x A
        intersecs = bbox_intersections(
            np.ascontiguousarray(dontcare_areas, dtype=np.float),  # D x 4
            np.ascontiguousarray(anchors, dtype=np.float)  # A x 4
        )
        intersecs_ = intersecs.sum(axis=0)  # A x 1
        labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1

    #这里我们暂时不考虑难样本的问题
    # preclude hard samples that are highly occlusioned, truncated or difficult to see
    if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[
            0] > 0:
        assert gt_ishard.shape[0] == gt_boxes.shape[0]
        gt_ishard = gt_ishard.astype(int)
        gt_hardboxes = gt_boxes[gt_ishard == 1, :]
        if gt_hardboxes.shape[0] > 0:
            # H x A
            hard_overlaps = bbox_overlaps(
                np.ascontiguousarray(gt_hardboxes, dtype=np.float),  # H x 4
                np.ascontiguousarray(anchors, dtype=np.float))  # A x 4
            hard_max_overlaps = hard_overlaps.max(axis=0)  # (A)
            labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
            max_intersec_label_inds = hard_overlaps.argmax(axis=1)  # H x 1
            labels[max_intersec_label_inds] = -1  #

    # subsample positive labels if we have too many
    #对正样本进行采样,如果正样本的数量太多的话
    # 限制正样本的数量不超过128个
    #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)  #随机去除掉一些正样本
        labels[disable_inds] = -1  #变为-1

    # subsample negative labels if we have too many
    #对负样本进行采样,如果负样本的数量太多的话
    # 正负样本总数是256,限制正样本数目最多128,
    # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
        #print "was %s inds, disabling %s, now %s inds" % (
        #len(bg_inds), len(disable_inds), np.sum(labels == 0))

    if DEBUG:
        print("考虑均衡住正负样本以后:")
        print('正样本:', len(np.where(labels == 1)[0]))
        print('负样本:', len(np.where(labels == 0)[0]))
        print('忽略样本:', len(np.where(labels == -1)[0]))
    # 至此, 上好标签,开始计算rpn-box的真值
    #--------------------------------------------------------------
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(
        anchors,
        gt_boxes[argmax_overlaps, :])  #根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)  #内部权重,前景就给1,其他是0

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:  #暂时使用uniform 权重,也就是正样本是1,负样本是0
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0) + 1
        # positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        # negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        positive_weights = np.ones((1, 4))
        negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            (np.sum(labels == 1)) + 1)
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            (np.sum(labels == 0)) + 1)
    bbox_outside_weights[labels == 1, :] = positive_weights  #  外部权重,前景是1,背景是0
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print('means:')
        print(means)
        print('stdevs:')
        print(stds)

    # map up to original set of anchors
    # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
    labels = _unmap(labels, total_anchors, inds_inside,
                    fill=-1)  #这些anchor的label是-1,也即dontcare
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside,
                          fill=0)  #这些anchor的真值是0,也即没有值
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)  #内部权重以0填充
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)  #外部权重以0填充

    if DEBUG:
        print('rpn: max max_overlap', np.max(max_overlaps))
        print('rpn: num_positive', np.sum(labels == 1))
        print('rpn: num_negative', np.sum(labels == 0))
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print('rpn: num_positive avg', _fg_sum / _count)
        print('rpn: num_negative avg', _bg_sum / _count)

    # labels
    labels = labels.reshape((1, height, width, A))  #reshap一下label
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))#reshape

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))
    rpn_bbox_outside_weights = bbox_outside_weights

    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
コード例 #3
0
def anchor_target_layer(rpn_cls_score,
                        rpn_cls_prob,
                        im_name,
                        gt_boxes_large,
                        gt_ishard,
                        dontcare_areas,
                        im_info,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[
                            16,
                        ]):
    """
    将gt_box划分为细框
    实现论文中的side-refinement
    arameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    :return:
    """
    global img_name
    if img_name != im_name:  # 第一次训练这个图片
        flag_first = True
    else:
        flag_first = False
    img_name = im_name

    if flag_first:  # 如果是第一次见到这个图片,就要重新生成所有tmp对象
        gt_boxes = split_frame(gt_boxes_large)
        # gt_width = gt_boxes[:,2]-gt_boxes[:,0]
        _anchors = generate_anchors(
            scales=np.array(anchor_scales))  # 生成基本的anchor,一共9个
        _num_anchors = _anchors.shape[0]  # 9个anchor

        if DEBUG:
            print('anchors:')
            print(_anchors)
            print('anchor shapes:')
            print(
                np.hstack((
                    _anchors[:, 2::4] - _anchors[:, 0::4],
                    _anchors[:, 3::4] - _anchors[:, 1::4],
                )))
            _counts = cfg.EPS
            _sums = np.zeros((1, 4))
            _squared_sums = np.zeros((1, 4))
            _fg_sum = 0
            _bg_sum = 0
            _count = 0

        # allow boxes to sit over the edge by a small amount
        _allowed_border = 0

        im_info = im_info[0]  # 图像的高宽及通道数

        assert rpn_cls_score.shape[0] == 1, \
            'Only single item batches are supported'

        # map of shape (..., H, W)
        height, width = rpn_cls_score.shape[1:3]  # feature-map的高宽

        if DEBUG:
            print('AnchorTargetLayer: height', height, 'width', width)
            print('')
            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print('scale: {}'.format(im_info[2]))
            print('height, width: ({}, {})'.format(height, width))
            print('rpn: gt_boxes.shape', gt_boxes.shape)

        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * _feat_stride  # (W)
        shift_y = np.arange(0, height) * _feat_stride  # (H)
        shift_x, shift_y = np.meshgrid(
            shift_x,
            shift_y)  # in W H order   # shift_x (H, W)  shift_y (H, W)

        # K is H x W
        shifts = np.vstack(
            (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
             shift_y.ravel()
             )).transpose()  # 生成feature-map和真实image上anchor之间的偏移量     #(H*W, 4)
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = _num_anchors  # 9个anchor
        K = shifts.shape[0]  # 50*37,feature-map的宽乘高的大小
        all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2)))  # 相当于复制宽高的维度,然后相加
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        # 仅保留那些还在图像内部的anchor,超出图像的都删掉
        inds_inside = np.where(
            (all_anchors[:, 0] >= -_allowed_border)
            & (all_anchors[:, 1] >= -_allowed_border)
            & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
            (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
        )[0]

        if DEBUG:
            print('total_anchors', total_anchors)
            print('inds_inside', len(inds_inside))

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]  # 保留那些在图像内的anchor   (In, 4)
        if DEBUG:
            print('anchors.shape', anchors.shape)

        # 至此,anchor准备好了
        # --------------------------------------------------------------
        # label: 1 is positive, 0 is negative, -1 is dont care
        # (A)
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)  # 初始化label,均为-1

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt), shape is A x G
        # 计算anchor和gt-box的overlap,用来给anchor上标签
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(
                gt_boxes,
                dtype=np.float))  # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
        # 存放每一个anchor和每一个gtbox之间的overlap
        argmax_overlaps = overlaps.argmax(
            axis=1)  # (A)#找到和每一个anchor,overlap最大的那个gt
        max_overlaps = overlaps[np.arange(
            len(inds_inside)
        ), argmax_overlaps]  # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值
        gt_argmax_overlaps = overlaps.argmax(
            axis=0)  # G#找到所有anchor中与gtbox,overlap最大的那个anchor  # (3)

        gt_max_overlaps = overlaps[
            gt_argmax_overlaps,
            np.arange(
                overlaps.shape[1]
            )]  # 比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[
            0]  # (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps <
                   cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0  # 先给背景上标签,小于0.3overlap的

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1  # 每个位置上的9个anchor中overlap最大的认为是前景
        # fg label: above threshold IOU
        labels[max_overlaps >=
               cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1  # overlap大于0.7的认为是前景

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        before_filter_labels = labels.copy()  # 过滤之前的标签,方便用来计算hard negtive
        all_bg_index = before_filter_labels == 0
        if DEBUG:
            print('在过滤数量之前:')
            print('正样本:' + str(len(np.where(labels == 1)[0])))
            print('负样本:' + str(len(np.where(labels == 0)[0])))
            print('忽略样本:' + str(len(np.where(labels == -1)[0])))

        # preclude dontcare areas
        if dontcare_areas is not None and dontcare_areas.shape[
                0] > 0:  # 这里我们暂时不考虑有doncare_area的存在
            # intersec shape is D x A
            intersecs = bbox_intersections(
                np.ascontiguousarray(dontcare_areas, dtype=np.float),  # D x 4
                np.ascontiguousarray(anchors, dtype=np.float)  # A x 4
            )
            intersecs_ = intersecs.sum(axis=0)  # A x 1
            labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1

        # 这里我们暂时不考虑难样本的问题
        # preclude hard samples that are highly occlusioned, truncated or difficult to see
        if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[
                0] > 0 and 0:
            assert gt_ishard.shape[0] == gt_boxes.shape[0]
            gt_ishard = gt_ishard.astype(int)
            gt_hardboxes = gt_boxes[gt_ishard == 1, :]
            if gt_hardboxes.shape[0] > 0:
                # H x A
                hard_overlaps = bbox_overlaps(
                    np.ascontiguousarray(gt_hardboxes,
                                         dtype=np.float),  # H x 4
                    np.ascontiguousarray(anchors, dtype=np.float))  # A x 4
                hard_max_overlaps = hard_overlaps.max(axis=0)  # (A)
                labels[
                    hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
                max_intersec_label_inds = hard_overlaps.argmax(axis=1)  # H x 1
                labels[max_intersec_label_inds] = -1  #

        # subsample positive labels if we have too many
        # 对正样本进行采样,如果正样本的数量太多的话
        # 限制正样本的数量不超过128个
        # TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(fg_inds,
                                      size=(len(fg_inds) - num_fg),
                                      replace=False)  # 随机去除掉一些正样本
            labels[disable_inds] = -1  # 变为-1

        # subsample negative labels if we have too many
        # 对负样本进行采样,如果负样本的数量太多的话
        # 正负样本总数是512,限制正样本数目最多128,
        # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本
        num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            labels[disable_inds] = -1
            # print "was %s inds, disabling %s, now %s inds" % (
            # len(bg_inds), len(disable_inds), np.sum(labels == 0))

        if DEBUG:
            print("考虑均衡住正负样本以后:")
            print('正样本:' + str(len(np.where(labels == 1)[0])))
            print('负样本:' + str(len(np.where(labels == 0)[0])))
            print('忽略样本:' + str(len(np.where(labels == -1)[0])))
        # 至此,第一次生成好了这个图片的labels,随机抽了512个
        # 生成其他部分的标签
        v_target, o_target = _compute_targets(anchors, gt_boxes[
            argmax_overlaps, :])  # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)

        # 但是计算损失函数的时候,其实是需要j索引和k索引,所以计算好这两个索引,一并返回,帮助计算损失函数
        # j索引,有效索引:正锚点或者与gt的overlap大于0.5以上的锚点的索引
        # 正锚点
        positive_index = np.where(labels == 1)[0]  # 应该是一个(p,)p应该不大于128

        #
        # ignore_index = np.where(labels==-1)[0]  # 应该是一个(n,)n应该很大,因为忽略的anchor很多
        keep_index = np.where(labels != -1)[0]
        _ = np.where(
            max_overlaps > 0.5)[0]  # 应该是一个(c,),表示overlap大于0.5的anchor的索引

        remove_ignore = list()
        for i in range(_.shape[0]):
            if i in keep_index:
                remove_ignore.append(_[i])
        remove_ignore = np.array(remove_ignore)
        effect_index = np.append(positive_index, remove_ignore)

        remove_repeat = np.array(list(set(list(effect_index))))

        j_index = remove_repeat.astype(np.int32)

        j_index1 = np.zeros((len(inds_inside)), dtype=np.int32)
        j_index1[j_index] = 1

        # k 索引 , 边缘索引

        # 先找到所有的可以认为是边缘的gt框,这里简单的认为是边缘框和左右各自一个。
        # ori_gt_box = (gt_boxes/im_info[2]).astype(np.int32, copy=False)
        ori_gt_box = gt_boxes.astype(np.float32, copy=False)
        # 找到左右边界框,矩阵操作实现  todo
        list_left_index = list()
        list_right_index = list()
        for i in range(ori_gt_box.shape[0]):
            if ori_gt_box[i][2] - ori_gt_box[i][0] != 15:
                list_left_index.append(i)
            else:
                continue
        list_index1 = list_left_index + list_right_index
        # 去除不属于gt中的索引和重复的索引
        list_index2 = list(set(list_index1))
        list_index3 = sorted(list_index2)
        list_index4 = list()
        for index in list_index3:
            if index in range(ori_gt_box.shape[0]):
                list_index4.append(index)

        gt_side_index = np.array(list_index4).astype(np.int32)  # 得到了边界gt框的索引

        # 要得到与这些gt框有最大的overlap的anchors的索引,这些anchor是我们关心的
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        anchor_side_index = gt_argmax_overlaps[
            gt_side_index]  # 得到143个与gt具有最大的overlaps的anchor的索引
        # 还要去掉与边界框overlap为0的anchor,因为这些anhcor不是真的我们关心的anchor,如果不去除,还会造成o_loss异常大
        anchor_side_list = list()
        for i in range(anchor_side_index.shape[0]):
            anchor_index = anchor_side_index[i]
            gt_index = gt_side_index[i]
            overlap = overlaps[anchor_index, gt_index]
            if overlap > 0:
                anchor_side_list.append(anchor_index)
        anchor_side_index = np.array(anchor_side_list, dtype=np.int32)

        anchor_side_index1 = np.array(
            sorted(list(set(list(anchor_side_index))))).astype(np.int32)
        k_index = anchor_side_index1  # (s,) s个边界索引,但是并不是包括之前去除的超过边界框的索引值,所以需要之后的操作

        k_index1 = np.zeros((len(inds_inside)), dtype=np.int32)
        k_index1[k_index] = 1

        in_labels = labels.copy()
        # map up to original set of anchors
        # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
        labels = _unmap(labels, total_anchors, inds_inside,
                        fill=-1)  # 这些anchor的label是-1,也即dontcare
        v_target = _unmap(v_target, total_anchors, inds_inside,
                          fill=0)  # 这些anchor的真值是0,也即没有值
        o_target = _unmap(o_target, total_anchors, inds_inside, fill=0)
        j_index2 = _unmap(j_index1, total_anchors, inds_inside,
                          fill=0).astype(np.int32)
        k_index2 = _unmap(k_index1, total_anchors, inds_inside,
                          fill=0).astype(np.int32)

        # real_j_index = np.where(j_index2==1)[0]
        # real_k_index = np.where(k_index2==1)[0]

        global tmp_labels, tmp_all_bg_index, tmp_v_target, tmp_o_target, tmp_j_index2, tmp_k_index2, tmp_inds_inside
        tmp_labels = in_labels
        tmp_all_bg_index = all_bg_index
        tmp_v_target = v_target
        tmp_o_target = o_target
        tmp_j_index2 = j_index2
        tmp_k_index2 = k_index2
        tmp_inds_inside = inds_inside

        if DEBUG or SHOW_SOME:
            print('第一次这张图')
            print('正样本:' + str(len(np.where(labels == 1)[0])))
            print('负样本:' + str(len(np.where(labels == 0)[0])))
            print('忽略样本:' + str(len(np.where(labels == -1)[0])))
            # print('保存的tmp_labels')
            # print('正样本:' + str(len(np.where(tmp_labels == 1)[0])))
            # print('负样本:' + str(len(np.where(tmp_labels == 0)[0])))
            # print('忽略样本:' + str(len(np.where(tmp_labels == -1)[0])))
        return labels, v_target, o_target, j_index2, k_index2

    else:  # 第二次见过这个图,只用生成hard neg添加进去
        if DEBUG and SHOW_SOME:
            print('不是第一次')
        # 先找出负样本
        bg_index = tmp_all_bg_index
        inds_inside = tmp_inds_inside
        # 找出得分高于某个阈值的
        rpn_cls_prob = np.reshape(rpn_cls_prob, [-1, 2])
        rpn_cls_prob = rpn_cls_prob[inds_inside, :]
        fg_score = rpn_cls_prob[:, 1]
        high_score = fg_score > 0.5

        # 找出即是负样本,又是分数很高的样本
        assert bg_index.shape == high_score.shape
        # 得到了hard negtive的索引,这个是
        hard_neg = bg_index * high_score

        if DEBUG:
            print('负样本的数量:' + str(len(np.where(bg_index == True)[0])))
            print('得分高于0.5的数量:' + str(len(np.where(high_score == True)[0])))
            print('hard negtive 数量' + str(len(np.where(hard_neg == True)[0])))

        # 如果是第二次训练这张图片,训练样本是第一次随机生成的正负样本加这一次的hard negtive
        labels = tmp_labels.copy()
        first_gen_index = labels != -1
        hard_neg_index = hard_neg
        assert first_gen_index.shape == hard_neg_index.shape, 'line 282'
        diff = hard_neg_index * 1 - first_gen_index * 1
        new_hard_index = diff == 1
        assert labels.shape == new_hard_index.shape

        if DEBUG:
            print('加载进来的第一次的labels的负样本的数量:' +
                  str(len(np.where(labels == 0)[0])))
            print('属于难负样本不属于第一次样本的数量:' +
                  str(len(np.where(new_hard_index == True)[0])))
            print('加难样本之前的负样本数量:' + str(len(np.where(labels == 0)[0])))

        labels[new_hard_index] = 0

        if DEBUG or SHOW_SOME:
            print('加难样本之后的负样本数量:' + str(len(np.where(labels == 0)[0])))
            print('正样本:' + str(len(np.where(labels == 1)[0])))
            print('负样本:' + str(len(np.where(labels == 0)[0])))
            print('忽略样本:' + str(len(np.where(labels == -1)[0])))
            # print('第一次保存的tmp_labels的负样本数量应该不变的'+ str(len(np.where(tmp_labels == 0)[0])))

        # 至此,准备好了labels
        # 其他标签不变,加载上次保存的就好
        v_target = tmp_v_target
        o_target = tmp_o_target
        j_index2 = tmp_j_index2
        k_index2 = tmp_k_index2

        return labels, v_target, o_target, j_index2, k_index2