Ejemplo n.º 1
0
def vis_detections(lidar_bv, image, calib, bbox_pred_cnr, rpn_data, rpn_rois,
                   rcnn_roi, scores, gt_boxes_3d):
    import matplotlib.pyplot as plt
    from utils.transform import lidar_3d_to_corners, corners_to_bv
    from fast_rcnn.bbox_transform import bbox_transform_inv_cnr
    from utils.draw import show_lidar_corners, show_image_boxes, scale_to_255
    from utils.cython_nms import nms, nms_new

    image = image.reshape((image.shape[1], image.shape[2], image.shape[3]))
    image += cfg.PIXEL_MEANS
    image = image.astype(np.uint8, copy=False)
    lidar_bv = lidar_bv.reshape(
        (lidar_bv.shape[1], lidar_bv.shape[2], lidar_bv.shape[3]))[:, :, 8]
    # visualize anchor_target_layer output
    rpn_anchors_3d = rpn_data[3][:, 1:7]
    rpn_bv = rpn_data[2][:, 1:5]
    # rpn_label = rpn_data[0]
    # print rpn_label.shape
    # print rpn_label[rpn_label==1]
    rpn_boxes_cnr = lidar_3d_to_corners(rpn_anchors_3d)
    img = show_lidar_corners(image, rpn_boxes_cnr, calib)
    img_bv = show_image_boxes(scale_to_255(lidar_bv, min=0, max=2), rpn_bv)

    print img.shape
    # plt.ion()
    plt.title('anchor target layer before regression')
    plt.subplot(211)
    plt.imshow(img_bv)
    plt.subplot(212)
    plt.imshow(img)
    plt.show()

    # visualize proposal_layer output
    boxes_3d = rpn_rois[2][:, 1:7]
    boxes_bv = rpn_rois[0][:, 0:5]
    boxes_img = rpn_rois[1][:, 0:5]

    # keep = nms(boxes_img, cfg.TEST.NMS)
    # boxes_img = boxes_img[keep]
    # boxes_3d = boxes_3d[keep]
    # boxes_cnr = lidar_3d_to_corners(boxes_3d[:100])
    print boxes_3d.shape
    print boxes_bv.shape
    # image_cnr = show_lidar_corners(image, boxes_cnr, calib)

    image_bv = show_image_boxes(lidar_bv, boxes_bv[:, 1:5])
    image_img = show_image_boxes(image, boxes_img[:, 1:5])
    plt.title('proposal_layer ')
    plt.subplot(211)
    plt.imshow(image_bv)
    plt.subplot(212)
    plt.imshow(image_img)
    plt.show()
Ejemplo n.º 2
0
def proposal_layer_3d(rpn_cls_prob_reshape,
                      rpn_bbox_pred,
                      im_info,
                      calib,
                      cfg_key,
                      _feat_stride=[
                          8,
                      ],
                      anchor_scales=[1.0, 1.0]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)

    #layer_params = yaml.load(self.param_str_)

    _anchors = generate_anchors_bv()
    #  _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]

    #print "aaaaaaa",_anchors.shape (4,4)
    #print "bbbbbbb",im_info          (601,601,1)
    #print "ccccccc", calib.shape   (4,12)

    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    min_size = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # print rpn_cls_prob_reshape.shape

    height, width = rpn_cls_prob_reshape.shape[1:3]
    # scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    scores = np.reshape(
        np.reshape(rpn_cls_prob_reshape,
                   [1, height, width, _num_anchors, 2])[:, :, :, :, 1],
        [1, height, width, _num_anchors])

    bbox_deltas = rpn_bbox_pred

    if DEBUG:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])

    # 1. Generate proposals from bbox deltas and shifted anchors

    if DEBUG:
        print 'score map size: {}'.format(scores.shape)

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 6))
    bbox_deltas = bbox_deltas.reshape((-1, 6))

    # print "bbox_deltas",bbox_deltas.shape
    # print anchors.shape
    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
    scores = scores.reshape((-1, 1))

    # print np.sort(scores.ravel())[-30:]

    # convert anchors bv to anchors_3d
    anchors_3d = bv_anchor_to_lidar(anchors)
    # Convert anchors into proposals via bbox transformations
    proposals_3d = bbox_transform_inv_3d(anchors_3d, bbox_deltas)
    # convert back to lidar_bv
    proposals_bv = lidar_3d_to_bv(proposals_3d)

    lidar_corners = lidar_3d_to_corners(proposals_3d)
    proposals_img = lidar_cnr_to_img(lidar_corners, calib[3], calib[2],
                                     calib[0])

    if DEBUG:
        # print "bbox_deltas: ", bbox_deltas[:10]
        # print "proposals number: ", proposals_3d[:10]
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape
        print "proposals_img shape:", proposals_img.shape

    # 2. clip predicted boxes to image
    proposals_bv = clip_boxes(proposals_bv, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals_bv, min_size * im_info[2])
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    # TODO: pass real image_info
    keep = _filter_img_boxes(proposals_img, [375, 1242])
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    if DEBUG:
        print "proposals after clip"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape
        print "proposals_img shape: ", proposals_img.shape
    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals_bv = proposals_bv[order, :]
    proposals_3d = proposals_3d[order, :]
    proposals_img = proposals_img[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals_bv, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    if DEBUG:
        print "proposals after nms"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals_bv.shape[0], 1), dtype=np.float32)
    blob_bv = np.hstack((batch_inds, proposals_bv.astype(np.float32,
                                                         copy=False)))
    blob_img = np.hstack(
        (batch_inds, proposals_img.astype(np.float32, copy=False)))
    blob_3d = np.hstack((batch_inds, proposals_3d.astype(np.float32,
                                                         copy=False)))

    if DEBUG:
        print "blob shape ====================:"
        print blob_bv.shape
        print blob_img.shape
        # print '3d', blob_3d[:10]
        # print lidar_corners[:10]
        # print 'bv', blob_bv[:10]
        # print 'img', blob_img[:10]

    return blob_bv, blob_img, blob_3d
Ejemplo n.º 3
0
def box_detect(sess, net, im, bv, calib, boxes=None):
    """Detect object classes in an lidar bv  given object proposals.
    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        bv (ndarray): lidar bv to test
        boxes (ndarray): R x 4 array of object proposals
    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """

    im_blob = im - cfg.PIXEL_MEANS
    lidar_bv_blob = bv

    im_blob = im_blob.reshape(
        (1, im_blob.shape[0], im_blob.shape[1], im_blob.shape[2]))
    lidar_bv_blob = lidar_bv_blob.reshape(
        (1, lidar_bv_blob.shape[0], lidar_bv_blob.shape[1],
         lidar_bv_blob.shape[2]))

    blobs = {'image_data': im_blob, 'lidar_bv_data': lidar_bv_blob}

    im_scales = [1]

    blobs['calib'] = calib
    bv_blob = blobs['lidar_bv_data']
    blobs['im_info'] = np.array(
        [[bv_blob.shape[1], bv_blob.shape[2], im_scales[0]]], dtype=np.float32)
    # forward pass
    feed_dict = {
        net.lidar_bv_data: blobs['lidar_bv_data'],
        net.image_data: blobs['image_data'],
        net.im_info: blobs['im_info'],
        net.calib: blobs['calib'],
        net.keep_prob: 1.0
    }

    conv5_3, deconv, rpn_cls_prob, rpn_cls_prob_reshape, rpn_cls_score_reshape, rpn_cls_score, cls_score, cls_prob, bbox_pred_cnr, rois = sess.run(
        [
            net.get_output('conv5_3_2'),
            net.get_output('conv5_3'),
            net.get_output('rpn_cls_prob'),
            net.get_output('rpn_cls_prob_reshape'),
            net.get_output('rpn_cls_score_reshape'),
            net.get_output('rpn_cls_score'),
            net.get_output('cls_score'),
            net.get_output('cls_prob'),
            net.get_output('bbox_pred'),
            net.get_output('rois')
        ],
        feed_dict=feed_dict)

    scores = cls_prob

    # plot featuremaps

    # print conv5_3.shape
    # # print deconv1.shape
    # activation = conv5_3
    # # featuremaps = activation.shape[3]
    # featuremaps = 48
    # plt.figure(1, figsize=(15,15))
    # for featuremap in range(featuremaps):
    #     plt.subplot(6,8, featuremap+1) # sets the number of feature maps to show on each row and column
    #     # plt.title('FeatureMap ' + str(featuremap)) # displays the feature map number
    #     plt.axis('off')
    #     plt.imshow(activation[0,:,:, featuremap], interpolation="nearest", cmap="jet")
    # plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.01, hspace=0.01)
    # plt.tight_layout(pad=0.1, h_pad=0.001, w_pad=0.001)
    # plt.show()

    # activation = deconv
    # print deconv.shape
    # # featuremaps = activation.shape[3]
    # featuremaps = 48
    # plt.figure(1, figsize=(15,15))
    # for featuremap in range(featuremaps):
    #     plt.subplot(6,8, featuremap+1) # sets the number of feature maps to show on each row and column
    #     # plt.title('FeatureMap ' + str(featuremap)) # displays the feature map number
    #     plt.axis('off')
    #     plt.imshow(activation[0,:,:, featuremap], interpolation="nearest", cmap="jet")
    # plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.01, hspace=0.01)
    # plt.tight_layout(pad=0.1, h_pad=0.001, w_pad=0.001)
    # plt.show()

    # print cls_score[np.where(cls_score[:,1] > 0)]
    # plt.hist(cls_score[:,1], bins=25)
    # plt.show()
    # plt.hist(scores[:,1], bins=25)
    # plt.show()

    assert len(im_scales) == 1, "Only single-image batch implemented"
    boxes_3d = rois[2][:, 1:7]

    # Apply bounding-box regression deltas
    box_deltas = bbox_pred_cnr
    boxes_cnr = lidar_3d_to_corners(boxes_3d)

    # img_boxes = lidar_cnr_to_img(boxes_cnr, calib[3], calib[2], calib[0])
    # img = show_image_boxes(im, img_boxes)
    # plt.imshow(img)
    # plt.show()

    # !! Important
    # ! Not apply corner regression
    pred_boxes_cnr = np.hstack((boxes_cnr, boxes_cnr))
    # apply corner regression
    pred_boxes_cnr_r = bbox_transform_inv_cnr(boxes_cnr, box_deltas)

    #  preject corners to lidar_bv
    pred_boxes_bv = corners_to_bv(pred_boxes_cnr)

    return scores, pred_boxes_bv, pred_boxes_cnr, pred_boxes_cnr_r
def proposal_layer_3d_debug(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,calib,cfg_in, _feat_stride = [8,], anchor_scales=[1.0, 1.0],debug_state=True):
    #copy part of the code from proposal_layer_3d for debug
    _anchors = generate_anchors_bv()
    #  _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]


    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # print rpn_cls_prob_reshape.shape

    height, width = rpn_cls_prob_reshape.shape[1:3]
    # scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1],[1, height, width, _num_anchors])

    bbox_deltas = rpn_bbox_pred

    if debug_state:
        print ('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print ('scale: {}'.format(im_info[2]))
    if debug_state:
        print ('score map size: {}'.format(scores.shape))

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    bbox_deltas = bbox_deltas.reshape((-1, 6))
    scores = scores.reshape((-1, 1))

    # convert anchors bv to anchors_3d
    anchors_3d = bv_anchor_to_lidar(anchors)
    # Convert anchors into proposals via bbox transformations
    proposals_3d = bbox_transform_inv_3d(anchors_3d, bbox_deltas)
    # convert back to lidar_bv
    proposals_bv = lidar_3d_to_bv(proposals_3d) #[x1,y1,x2,y2]

    lidar_corners = lidar_3d_to_corners(proposals_3d)
    proposals_img = lidar_cnr_to_img(lidar_corners,
                                calib[3], calib[2], calib[0])


    if debug_state:
        # print "bbox_deltas: ", bbox_deltas[:10]
        # print "proposals number: ", proposals_3d[:10]
        print ("proposals_bv shape: ", proposals_bv.shape)
        print ("proposals_3d shape: ", proposals_3d.shape)
        print ("scores shape:", scores.shape)

    # 2. clip predicted boxes to image
    #WZN: delete those not in image
    ind_inside = clip_anchors(anchors, im_info[:2])
    #ind_inside = np.logical_and(ind_inside,clip_anchors(proposals_bv, im_info[:2]))
    proposals_bv = proposals_bv[ind_inside,:]
    proposals_3d = proposals_3d[ind_inside,:]
    proposals_img = proposals_img[ind_inside,:]
    scores = scores[ind_inside,:]
    proposals_bv = clip_boxes(proposals_bv, im_info[:2])
    
    
    # TODO: pass real image_info
    #keep = _filter_img_boxes(proposals_img, [375, 1242])
    #proposals_bv = proposals_bv[keep, :]
    #proposals_3d = proposals_3d[keep, :]
    #proposals_img = proposals_img[keep, :]
    #scores = scores[keep]

    if debug_state:
        print ("proposals after clip")
        print ("proposals_bv shape: ", proposals_bv.shape)
        print ("proposals_3d shape: ", proposals_3d.shape)
        print ("proposals_img shape: ", proposals_img.shape)
    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if cfg_in['pre_keep_topN'] > 0:
        order = order[:cfg_in['pre_keep_topN']]
    #keep = keep[order]
    proposals_bv = proposals_bv[order, :]
    proposals_3d = proposals_3d[order, :]
    proposals_img = proposals_img[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    if cfg_in['use_nms']:
        keep = nms(np.hstack((proposals_bv, scores)), cfg_in['nms_thresh'])
        if cfg_in['nms_topN'] > 0:
            keep = keep[:cfg_in['nms_topN']]
        proposals_bv = proposals_bv[keep, :]
        proposals_3d = proposals_3d[keep, :]
        proposals_img = proposals_img[keep, :]
        scores = scores[keep]

        if debug_state:
            print ("proposals after nms")
            print ("proposals_bv shape: ", proposals_bv.shape)
            print ("proposals_3d shape: ", proposals_3d.shape)

    # debug only: keep probabilities above a threshold
    if cfg_in['prob_thresh']:
        keep_ind = scores[:,0]>cfg_in['prob_thresh']
        print ('scores: ',scores)
        print ('threshold: ', cfg_in['prob_thresh'])
        print ('score shape:', scores.shape)
        #print keep_ind.shape
        #print keep.shape
        #keep = keep[keep_ind]
        proposals_bv = proposals_bv[keep_ind, :]
        proposals_3d = proposals_3d[keep_ind, :]
        proposals_img = proposals_img[keep_ind, :]
        scores = scores[keep_ind]

    return proposals_bv,proposals_3d,proposals_img,scores
def _sample_rois_3d(all_rois_bv, all_rois_3d, gt_boxes_bv, gt_boxes_corners, fg_rois_per_image, rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)

    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois_bv[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes_bv[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes_bv[gt_assignment, 4]

    if DEBUG:
        print "overlaps: ", overlaps.shape
        print "gt assignment: ",  gt_assignment.shape
        print "max_overlaps: ", max_overlaps.shape
        print "labels: ", labels.shape

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    if DEBUG:
        print "fg_inds: ", fg_inds.shape
        # print "fg_rois_per_image: ", fg_rois_per_image
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)

    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]

    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image

    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0


    rois_bv = all_rois_bv[keep_inds]
    rois_3d = all_rois_3d[keep_inds]

    # convert 3d to corners
    rois_cnr = lidar_3d_to_corners(rois_3d[:,1:7])
    rois_cnr = np.hstack((rois_3d[:,0].reshape(-1,1), rois_cnr))

    if DEBUG:
        print "labels shape: ", labels.shape
        print "keep_inds: ", keep_inds
        print "all_rois_bv shape:, ", all_rois_bv.shape
        print "rois_3d shape:, ", rois_3d.shape
        print "rois_cnr shape:, ", rois_cnr.shape

    bbox_target_data = _compute_targets_cnr(
        rois_cnr[:, 1:25], gt_boxes_corners[gt_assignment[keep_inds], :24], labels)
    bbox_targets = \
        _get_bbox_regression_labels_3d(bbox_target_data, num_classes)
    return labels, rois_bv, rois_cnr, rois_3d, bbox_targets