Esempio n. 1
0
def proposal_layer_3d(rpn_cls_prob_reshape,
                      rpn_bbox_pred,
                      im_info,
                      calib,
                      cfg_key,
                      _feat_stride=[
                          8,
                      ],
                      anchor_scales=[1.0, 1.0]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)

    #layer_params = yaml.load(self.param_str_)

    _anchors = generate_anchors_bv()
    #  _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]

    #print "aaaaaaa",_anchors.shape (4,4)
    #print "bbbbbbb",im_info          (601,601,1)
    #print "ccccccc", calib.shape   (4,12)

    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    min_size = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # print rpn_cls_prob_reshape.shape

    height, width = rpn_cls_prob_reshape.shape[1:3]
    # scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    scores = np.reshape(
        np.reshape(rpn_cls_prob_reshape,
                   [1, height, width, _num_anchors, 2])[:, :, :, :, 1],
        [1, height, width, _num_anchors])

    bbox_deltas = rpn_bbox_pred

    if DEBUG:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])

    # 1. Generate proposals from bbox deltas and shifted anchors

    if DEBUG:
        print 'score map size: {}'.format(scores.shape)

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 6))
    bbox_deltas = bbox_deltas.reshape((-1, 6))

    # print "bbox_deltas",bbox_deltas.shape
    # print anchors.shape
    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
    scores = scores.reshape((-1, 1))

    # print np.sort(scores.ravel())[-30:]

    # convert anchors bv to anchors_3d
    anchors_3d = bv_anchor_to_lidar(anchors)
    # Convert anchors into proposals via bbox transformations
    proposals_3d = bbox_transform_inv_3d(anchors_3d, bbox_deltas)
    # convert back to lidar_bv
    proposals_bv = lidar_3d_to_bv(proposals_3d)

    lidar_corners = lidar_3d_to_corners(proposals_3d)
    proposals_img = lidar_cnr_to_img(lidar_corners, calib[3], calib[2],
                                     calib[0])

    if DEBUG:
        # print "bbox_deltas: ", bbox_deltas[:10]
        # print "proposals number: ", proposals_3d[:10]
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape
        print "proposals_img shape:", proposals_img.shape

    # 2. clip predicted boxes to image
    proposals_bv = clip_boxes(proposals_bv, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals_bv, min_size * im_info[2])
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    # TODO: pass real image_info
    keep = _filter_img_boxes(proposals_img, [375, 1242])
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    if DEBUG:
        print "proposals after clip"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape
        print "proposals_img shape: ", proposals_img.shape
    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals_bv = proposals_bv[order, :]
    proposals_3d = proposals_3d[order, :]
    proposals_img = proposals_img[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals_bv, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    if DEBUG:
        print "proposals after nms"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals_bv.shape[0], 1), dtype=np.float32)
    blob_bv = np.hstack((batch_inds, proposals_bv.astype(np.float32,
                                                         copy=False)))
    blob_img = np.hstack(
        (batch_inds, proposals_img.astype(np.float32, copy=False)))
    blob_3d = np.hstack((batch_inds, proposals_3d.astype(np.float32,
                                                         copy=False)))

    if DEBUG:
        print "blob shape ====================:"
        print blob_bv.shape
        print blob_img.shape
        # print '3d', blob_3d[:10]
        # print lidar_corners[:10]
        # print 'bv', blob_bv[:10]
        # print 'img', blob_img[:10]

    return blob_bv, blob_img, blob_3d
Esempio n. 2
0
def proposal_layer_3d(rpn_cls_prob_reshape,
                      rpn_bbox_pred,
                      im_info,
                      gt_bv,
                      cfg_key,
                      _feat_stride=[8, 8]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)

    # layer_params = yaml.load(self.param_str_)

    beg = datetime.datetime.now()
    _anchors = generate_anchors_bv()
    _num_anchors = _anchors.shape[0]
    im_info = im_info[0]
    assert rpn_cls_prob_reshape.shape[
        0] == 1, 'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want

    height, width = rpn_cls_prob_reshape.shape[1:3]
    scores = np.reshape(
        np.reshape(rpn_cls_prob_reshape,
                   [1, height, width, _num_anchors, 2])[:, :, :, :, 1],
        [1, height, width, _num_anchors
         ])  # extract the second kind (fg) scores
    bbox_deltas = rpn_bbox_pred
    if DEBUG:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'rpn_bbox_pred shape : {}'.format(rpn_bbox_pred.shape)

    # 1. Generate proposals from bbox deltas and shifted anchors
    if DEBUG:
        print 'score map size: {}'.format(scores.shape)
        pass

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride[0]
    shift_y = np.arange(0, height) * _feat_stride[1]
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # Enumerate all shifted anchors:
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))
    # print "anchors shape: ", anchors.shape
    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 6))
    bbox_deltas = bbox_deltas.reshape((-1, 3))  # delta x delta y delta z
    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
    scores = scores.reshape((-1, 1))

    if DEBUG:
        print "anchors before filter"
        print "anchors shape: ", anchors.shape
        print "scores shape: ", scores.shape
    ###
    # only keep anchors inside the image
    inds_inside = _filter_anchors(anchors, im_info, allowed_border=0)
    anchors = anchors[inds_inside, :]
    scores = scores[inds_inside]
    bbox_deltas = bbox_deltas[inds_inside, :]
    ####

    # convert anchors bv to anchors_3d
    anchors_3d = bv_anchor_to_lidar(anchors)
    # Convert anchors into proposals via bbox transformations
    proposals_3d = bbox_transform_inv_3d(anchors_3d, bbox_deltas)
    # convert back to lidar_bv
    proposals_bv = lidar_3d_to_bv(proposals_3d)
    if DEBUG:
        print "after filter"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape
        print "scores shape: ", scores.shape

    # # 2. clip predicted boxes to image
    # proposals_bv = clip_boxes(proposals_bv, im_info[:2])

    # # 3. remove predicted boxes with either height or width < threshold
    # # (NOTE: convert min_size to input image scale stored in im_info[2])
    # keep = _filter_boxes(proposals_bv, min_size * im_info[2])
    # proposals_bv = proposals_bv[keep, :]
    # proposals_3d = proposals_3d[keep, :]
    # # proposals_img = proposals_img[keep, :]
    # scores = scores[keep]

    # keep = _filter_img_boxes(proposals_img, [375, 1242])
    # proposals_bv = proposals_bv[keep, :]
    # proposals_3d = proposals_3d[keep, :]
    # proposals_img = proposals_img[keep, :]
    # scores = scores[keep]

    # print "proposals_img shape: ", proposals_img.shape
    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals_bv = proposals_bv[order, :]
    proposals_3d = proposals_3d[order, :]
    # proposals_img = proposals_img[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    if DEBUG:
        print "proposals before nms"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape

    keep = nms(np.hstack((proposals_bv, scores)), nms_thresh, force_cpu=False)
    if DEBUG:
        print keep
        print 'keep.shape', len(keep)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    # proposals_img = proposals_img[keep, :]
    scores = scores[keep]
    if DEBUG:
        num = np.sort(scores.ravel())
        num = num[::-1]
        print num
    if DEBUG:
        print "proposals after nms"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    length = proposals_bv.shape[0]
    box_labels, thetas, recall = valid_pred(proposals_bv, gt_bv, length,
                                            cfg.TRAIN.RPN_POSITIVE_OVERLAP)
    blob_bv = np.hstack((proposals_bv.astype(np.float32, copy=False), scores,
                         box_labels.reshape(length,
                                            -1), thetas.reshape(length, -1)))
    blob_3d = np.hstack((proposals_3d.astype(np.float32, copy=False), scores,
                         box_labels.reshape(length,
                                            -1), thetas.reshape(length, -1)))
    end2 = datetime.datetime.now()
    if DEBUG:
        print 'NMS & bbox use time:', end2 - beg

    return blob_bv, blob_3d, recall
def anchor_target_layer(rpn_cls_score,
                        gt_boxes,
                        gt_boxes_3d,
                        im_info,
                        _feat_stride=[8, 8]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    """
    _anchors = generate_anchors_bv()
    _num_anchors = _anchors.shape[0]

    if DEBUG:
        print 'anchors:'
        print _anchors.shape
        print 'anchor shapes:'
        print np.hstack((
            _anchors[:, 2::4] - _anchors[:, 0::4],
            _anchors[:, 3::4] - _anchors[:, 1::4],
        ))
        _counts = cfg.EPS
        _sums = np.zeros((1, 6))
        _squared_sums = np.zeros((1, 6))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0
    # map of shape (..., H, W)
    # height, width = rpn_cls_score.shape[1:3]
    im_info = im_info[0]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    if DEBUG:
        print 'AnchorTargetLayer: height', height, 'width', width
        print ''
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])
        print 'height, width: ({}, {})'.format(height, width)
        print 'rpn: gt_boxes.shape', gt_boxes.shape
        print 'rpn: gt_boxes', gt_boxes
        print 'feat_stride', _feat_stride

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride[0]
    shift_y = np.arange(0, height) * _feat_stride[1]
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)
    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print 'total_anchors: ', total_anchors
        print 'inds_inside: ', len(inds_inside)

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print 'anchors.shape: ', anchors.shape

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    # gt_max_overlaps = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])]
    # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # hard negative for proposal_target_layer
        hard_negative = np.logical_and(
            0 < max_overlaps, max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)
        labels[hard_negative] = 0

    # fg label: for each gt, anchor with highest overlap

    labels[gt_argmax_overlaps] = 1

    # random sample

    # fg label: above threshold IOU
    # print np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

        #print "was %s inds, disabling %s, now %s inds" % (
        #len(bg_inds), len(disable_inds), np.sum(labels == 0))

    anchors_3d = bv_anchor_to_lidar(anchors)
    bbox_targets = _compute_targets_3d(anchors_3d,
                                       gt_boxes_3d[argmax_overlaps, :])
    if DEBUG:
        print 'It is ok2'
    labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    # print 'labels = 0:, ', np.where(labels == 0)
    all_inds = np.where(labels != -1)
    labels_new = labels[all_inds]
    zeros = np.zeros((labels_new.shape[0], 1), dtype=np.float32)
    # print 'anchores: ',anchors.shape
    # print "zeros: ",zeros.shape
    # print 'labels_new: ',labels_new.shape
    # print 'anchors[all_inds]:',anchors[all_inds].shape
    anchors = np.hstack(
        (labels_new.reshape(-1, 1), anchors[all_inds])).astype(np.float32)
    anchors_3d = np.hstack((zeros, anchors_3d[all_inds])).astype(np.float32)

    # labels[hard_negative] = -1
    # # subsample negative labels if we have too many
    # num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    # bg_inds = np.where(labels != 1)[0]
    # # print len(bg_inds)
    # if len(bg_inds) > num_bg:
    #     disable_inds = npr.choice(
    #         bg_inds, size=(num_bg), replace=False)
    #     labels[disable_inds] = 0

    # all_inds = np.where(labels != -1)
    # labels_new = labels[all_inds]
    # zeros = np.zeros((labels_new.shape[0], 1), dtype=np.float32)
    # # print zeros.shape
    # # print len(all_inds)
    # anchors =  np.hstack((zeros, anchors[all_inds])).astype(np.float32)
    # anchors_3d =  np.hstack((zeros, anchors_3d[all_inds])).astype(np.float32)

    # bg_inds = np.where(hard_negative == True)[0]
    # disable_inds = npr.choice(
    #         bg_inds, size=(len(bg_inds)/2.), replace=False)
    # labels[disable_inds] = -1

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print 'means:'
        print means
        print 'stdevs:'
        print stds

    if DEBUG:
        print 'gt_boxes_3d: ', gt_boxes_3d[argmax_overlaps, :].shape
        print 'labels shape before unmap: ', labels.shape
        print 'targets shaoe before unmap: ', bbox_targets.shape
    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)

    if DEBUG:
        print 'rpn: max max_overlap', np.max(max_overlaps)
        print 'rpn: num_positive', np.sum(labels == 1)
        print 'rpn: num_negative', np.sum(labels == 0)
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print 'rpn: num_positive avg', _fg_sum / _count
        print 'rpn: num_negative avg', _bg_sum / _count
        print 'fg inds: ', fg_inds
        print 'label shape', labels.shape
        print 'bbox_targets', bbox_targets.shape

    # labels
    rpn_labels = labels
    rpn_bbox_targets = bbox_targets

    if DEBUG:
        print 'labels shape: ', labels.shape
        print 'targets shape: ', bbox_targets.shape

    # pos = len(np.where(labels[:] == 1))
    # neg = len(np.where(labels[:] == 0))
    # print 'Total rois:{}, postive:{} and negative:{}'.format(pos+neg,pos,neg)

    return rpn_labels, rpn_bbox_targets, anchors, anchors_3d
Esempio n. 4
0
def generate_rpn(rpn_cls_prob_reshape,
                 rpn_bbox_pred,
                 im_info,
                 cfg_key,
                 _feat_stride=[8, 8]):  # for Test processing

    test_debug = False
    start = datetime.datetime.now()
    _anchors = generate_anchors_bv()
    _num_anchors = _anchors.shape[0]
    im_info = im_info[0]
    assert rpn_cls_prob_reshape.shape[
        0] == 1, 'Only single item batches are supported'
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    height, width = rpn_cls_prob_reshape.shape[1:3]
    scores = np.reshape(
        np.reshape(rpn_cls_prob_reshape,
                   [1, height, width, _num_anchors, 2])[:, :, :, :, 1],
        [1, height, width, _num_anchors
         ])  # extract the second kind (fg) scores
    bbox_deltas = rpn_bbox_pred
    if test_debug:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'rpn_bbox_pred shape : {}'.format(rpn_bbox_pred.shape)

    # 1. Generate proposals from bbox deltas and shifted anchors
    if test_debug:
        print 'score map size: {}'.format(scores.shape)
        pass

    # Enumerate all shifts
    # TODO: replace generate anchors by load from file
    shift_x = np.arange(0, width) * _feat_stride[0]
    shift_y = np.arange(0, height) * _feat_stride[1]
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # Enumerate all shifted anchors:
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))
    bbox_deltas = bbox_deltas.reshape((-1, 3))  # delta x delta y delta z
    scores = scores.reshape((-1, 1))

    if test_debug:
        print "anchors before filter"
        print "anchors shape: ", anchors.shape
        print "scores shape: ", scores.shape

    # only keep anchors inside the image
    inds_inside = _filter_anchors(anchors, im_info, allowed_border=0)
    anchors = anchors[inds_inside, :]
    scores = scores[inds_inside]
    bbox_deltas = bbox_deltas[inds_inside, :]

    # convert anchors bv to anchors_3d
    anchors_3d = bv_anchor_to_lidar(anchors)
    # Convert anchors into proposals via bbox transformations
    proposals_3d = bbox_transform_inv_3d(anchors_3d, bbox_deltas)
    # convert back to lidar_bv
    proposals_bv = lidar_3d_to_bv(proposals_3d)
    if test_debug:
        print "after filter"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape
        print "scores shape: ", scores.shape

    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals_bv = proposals_bv[order, :]
    proposals_3d = proposals_3d[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    if test_debug:
        print "proposals before nms"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape

    keep = nms(np.hstack((proposals_bv, scores)), nms_thresh, force_cpu=False)
    if test_debug:
        print keep
        print 'keep.shape', len(keep)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    # proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    if test_debug:
        print "proposals after nms"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    length = proposals_bv.shape[0]
    blob_bv = np.hstack((proposals_bv.astype(np.float32, copy=False), scores,
                         np.zeros((length, 1), dtype=np.float32)))
    blob_3d = np.hstack((proposals_3d.astype(np.float32, copy=False), scores,
                         np.zeros((length, 1), dtype=np.float32)))
    end = datetime.datetime.now()

    if test_debug:
        pass
        print 'NMS & bbox use time:', end - start

    return blob_bv, blob_3d