def proposal_layer_3d(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, calib, cfg_key, _feat_stride=[ 8, ], anchor_scales=[1.0, 1.0]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) _anchors = generate_anchors_bv() # _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] #print "aaaaaaa",_anchors.shape (4,4) #print "bbbbbbb",im_info (601,601,1) #print "ccccccc", calib.shape (4,12) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # print rpn_cls_prob_reshape.shape height, width = rpn_cls_prob_reshape.shape[1:3] # scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] scores = np.reshape( np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:, :, :, :, 1], [1, height, width, _num_anchors]) bbox_deltas = rpn_bbox_pred if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 6)) bbox_deltas = bbox_deltas.reshape((-1, 6)) # print "bbox_deltas",bbox_deltas.shape # print anchors.shape # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) scores = scores.reshape((-1, 1)) # print np.sort(scores.ravel())[-30:] # convert anchors bv to anchors_3d anchors_3d = bv_anchor_to_lidar(anchors) # Convert anchors into proposals via bbox transformations proposals_3d = bbox_transform_inv_3d(anchors_3d, bbox_deltas) # convert back to lidar_bv proposals_bv = lidar_3d_to_bv(proposals_3d) lidar_corners = lidar_3d_to_corners(proposals_3d) proposals_img = lidar_cnr_to_img(lidar_corners, calib[3], calib[2], calib[0]) if DEBUG: # print "bbox_deltas: ", bbox_deltas[:10] # print "proposals number: ", proposals_3d[:10] print "proposals_bv shape: ", proposals_bv.shape print "proposals_3d shape: ", proposals_3d.shape print "proposals_img shape:", proposals_img.shape # 2. clip predicted boxes to image proposals_bv = clip_boxes(proposals_bv, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals_bv, min_size * im_info[2]) proposals_bv = proposals_bv[keep, :] proposals_3d = proposals_3d[keep, :] proposals_img = proposals_img[keep, :] scores = scores[keep] # TODO: pass real image_info keep = _filter_img_boxes(proposals_img, [375, 1242]) proposals_bv = proposals_bv[keep, :] proposals_3d = proposals_3d[keep, :] proposals_img = proposals_img[keep, :] scores = scores[keep] if DEBUG: print "proposals after clip" print "proposals_bv shape: ", proposals_bv.shape print "proposals_3d shape: ", proposals_3d.shape print "proposals_img shape: ", proposals_img.shape # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals_bv = proposals_bv[order, :] proposals_3d = proposals_3d[order, :] proposals_img = proposals_img[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals_bv, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals_bv = proposals_bv[keep, :] proposals_3d = proposals_3d[keep, :] proposals_img = proposals_img[keep, :] scores = scores[keep] if DEBUG: print "proposals after nms" print "proposals_bv shape: ", proposals_bv.shape print "proposals_3d shape: ", proposals_3d.shape # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals_bv.shape[0], 1), dtype=np.float32) blob_bv = np.hstack((batch_inds, proposals_bv.astype(np.float32, copy=False))) blob_img = np.hstack( (batch_inds, proposals_img.astype(np.float32, copy=False))) blob_3d = np.hstack((batch_inds, proposals_3d.astype(np.float32, copy=False))) if DEBUG: print "blob shape ====================:" print blob_bv.shape print blob_img.shape # print '3d', blob_3d[:10] # print lidar_corners[:10] # print 'bv', blob_bv[:10] # print 'img', blob_img[:10] return blob_bv, blob_img, blob_3d
def proposal_layer_3d_debug(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,calib,cfg_in, _feat_stride = [8,], anchor_scales=[1.0, 1.0],debug_state=True): #copy part of the code from proposal_layer_3d for debug _anchors = generate_anchors_bv() # _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # print rpn_cls_prob_reshape.shape height, width = rpn_cls_prob_reshape.shape[1:3] # scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1],[1, height, width, _num_anchors]) bbox_deltas = rpn_bbox_pred if debug_state: print ('im_size: ({}, {})'.format(im_info[0], im_info[1])) print ('scale: {}'.format(im_info[2])) if debug_state: print ('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) bbox_deltas = bbox_deltas.reshape((-1, 6)) scores = scores.reshape((-1, 1)) # convert anchors bv to anchors_3d anchors_3d = bv_anchor_to_lidar(anchors) # Convert anchors into proposals via bbox transformations proposals_3d = bbox_transform_inv_3d(anchors_3d, bbox_deltas) # convert back to lidar_bv proposals_bv = lidar_3d_to_bv(proposals_3d) #[x1,y1,x2,y2] lidar_corners = lidar_3d_to_corners(proposals_3d) proposals_img = lidar_cnr_to_img(lidar_corners, calib[3], calib[2], calib[0]) if debug_state: # print "bbox_deltas: ", bbox_deltas[:10] # print "proposals number: ", proposals_3d[:10] print ("proposals_bv shape: ", proposals_bv.shape) print ("proposals_3d shape: ", proposals_3d.shape) print ("scores shape:", scores.shape) # 2. clip predicted boxes to image #WZN: delete those not in image ind_inside = clip_anchors(anchors, im_info[:2]) #ind_inside = np.logical_and(ind_inside,clip_anchors(proposals_bv, im_info[:2])) proposals_bv = proposals_bv[ind_inside,:] proposals_3d = proposals_3d[ind_inside,:] proposals_img = proposals_img[ind_inside,:] scores = scores[ind_inside,:] proposals_bv = clip_boxes(proposals_bv, im_info[:2]) # TODO: pass real image_info #keep = _filter_img_boxes(proposals_img, [375, 1242]) #proposals_bv = proposals_bv[keep, :] #proposals_3d = proposals_3d[keep, :] #proposals_img = proposals_img[keep, :] #scores = scores[keep] if debug_state: print ("proposals after clip") print ("proposals_bv shape: ", proposals_bv.shape) print ("proposals_3d shape: ", proposals_3d.shape) print ("proposals_img shape: ", proposals_img.shape) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if cfg_in['pre_keep_topN'] > 0: order = order[:cfg_in['pre_keep_topN']] #keep = keep[order] proposals_bv = proposals_bv[order, :] proposals_3d = proposals_3d[order, :] proposals_img = proposals_img[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if cfg_in['use_nms']: keep = nms(np.hstack((proposals_bv, scores)), cfg_in['nms_thresh']) if cfg_in['nms_topN'] > 0: keep = keep[:cfg_in['nms_topN']] proposals_bv = proposals_bv[keep, :] proposals_3d = proposals_3d[keep, :] proposals_img = proposals_img[keep, :] scores = scores[keep] if debug_state: print ("proposals after nms") print ("proposals_bv shape: ", proposals_bv.shape) print ("proposals_3d shape: ", proposals_3d.shape) # debug only: keep probabilities above a threshold if cfg_in['prob_thresh']: keep_ind = scores[:,0]>cfg_in['prob_thresh'] print ('scores: ',scores) print ('threshold: ', cfg_in['prob_thresh']) print ('score shape:', scores.shape) #print keep_ind.shape #print keep.shape #keep = keep[keep_ind] proposals_bv = proposals_bv[keep_ind, :] proposals_3d = proposals_3d[keep_ind, :] proposals_img = proposals_img[keep_ind, :] scores = scores[keep_ind] return proposals_bv,proposals_3d,proposals_img,scores
def test_net(sess, net, imdb, weights_filename, max_per_image=300, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) # all_boxes_cnr[cls][image] = N x 25 array of detections in # (x0-x7, y0-y7, z0-z7, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_boxes_img = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_boxes_cnr = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} # conv1_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv1_1") # conv1_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv1_2") # conv2_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv2_1") # conv2_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv2_2") # conv3_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_1") # conv3_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_2") # conv3_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_3") # conv4_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_1") # conv4_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_2") # conv4_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_3") # conv5_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_1") # conv5_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_2") # conv5_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_3") # rpn_w = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_conv/3x3")[0] # rpn_b = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_conv/3x3")[1] # rpn_w2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_cls_score")[0] # rpn_b2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_cls_score")[1] # rpn_w3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_bbox_pred")[0] # rpn_b3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_bbox_pred")[1] # weights = { # 'conv1_1' : {"weights" : conv1_1[0].eval(session=sess), "biases": conv1_1[1].eval(session=sess)}, # 'conv1_2' : {"weights" : conv1_2[0].eval(session=sess), "biases": conv1_2[1].eval(session=sess)}, # 'conv2_1' : {"weights" : conv2_1[0].eval(session=sess), "biases": conv2_1[1].eval(session=sess)}, # 'conv2_2' : {"weights" : conv2_2[0].eval(session=sess), "biases": conv2_2[1].eval(session=sess)}, # 'conv3_1' : {"weights" : conv3_1[0].eval(session=sess), "biases": conv3_1[1].eval(session=sess)}, # 'conv3_2' : {"weights" : conv3_2[0].eval(session=sess), "biases": conv3_2[1].eval(session=sess)}, # 'conv3_3' : {"weights" : conv3_3[0].eval(session=sess), "biases": conv3_3[1].eval(session=sess)}, # 'conv4_1' : {"weights" : conv4_1[0].eval(session=sess), "biases": conv4_1[1].eval(session=sess)}, # 'conv4_2' : {"weights" : conv4_2[0].eval(session=sess), "biases": conv4_2[1].eval(session=sess)}, # 'conv4_3' : {"weights" : conv4_3[0].eval(session=sess), "biases": conv4_3[1].eval(session=sess)}, # 'conv5_1' : {"weights" : conv5_1[0].eval(session=sess), "biases": conv5_1[1].eval(session=sess)}, # 'conv5_2' : {"weights" : conv5_2[0].eval(session=sess), "biases": conv5_2[1].eval(session=sess)}, # 'conv5_3' : {"weights" : conv5_3[0].eval(session=sess), "biases": conv5_3[1].eval(session=sess)}, # 'rpn_conv/3x3' : {"weights" : rpn_w.eval(session=sess), "biases": rpn_b.eval(session=sess)}, # 'rpn_cls_score' : {"weights" : rpn_w2.eval(session=sess), "biases": rpn_b2.eval(session=sess)}, # 'rpn_bbox_pred' : {"weights" : rpn_w3.eval(session=sess), "biases": rpn_b3.eval(session=sess)}, # } # # print rpn_w.eval(session=sess) # np.save('rpn_data.npy', weights) # deconv2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="deconv_4x_1")[0] # shape_conv5_3 = conv5_3.get_shape().as_list() # shape1 = deconv1.get_shape().as_list() # shape2 = deconv2.get_shape().as_list() # print 'conv5_3 shape', shape_conv5_3 # print 'deconv_2x_1 shape', shape1 # print 'deconv_4x_1 shape', shape2 for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None im = cv2.imread(imdb.image_path_at(i)) bv = np.load(imdb.lidar_path_at(i)) calib = imdb.calib_at(i) print "Inference: ", imdb.lidar_path_at(i) _t['im_detect'].tic() scores, boxes_bv, boxes_cnr, boxes_cnr_r = box_detect( sess, net, im, bv, calib, box_proposals) _t['im_detect'].toc() _t['misc'].tic() if vis: image = im[:, :, (2, 1, 0)] plt.cla() plt.imshow(image) thresh = 0.05 # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes_bv[inds, j * 4:(j + 1) * 4] cls_boxes_cnr = boxes_cnr[inds, j * 24:(j + 1) * 24] cls_boxes_cnr_r = boxes_cnr_r[inds, j * 24:(j + 1) * 24] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) cls_dets_cnr = np.hstack((cls_boxes_cnr, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) cls_dets_cnr_r = np.hstack((cls_boxes_cnr_r, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) # print "scores: ", cls_scores # print "cls_dets : ", cls_dets.shape keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] cls_dets_cnr = cls_dets_cnr[keep, :] cls_dets_cnr_r = cls_dets_cnr_r[keep, :] cls_scores = cls_scores[keep] # project to image if np.any(cls_dets_cnr): plt.rcParams['figure.figsize'] = (10, 10) img_boxes = lidar_cnr_to_img(cls_dets_cnr[:, :24], calib[3], calib[2], calib[0]) img = show_image_boxes(im, img_boxes) plt.imshow(img) #plt.show() print cls_dets_cnr.shape image_bv = show_image_boxes( scale_to_255(bv[:, :, 8], min=0, max=2), cls_dets[:, :4]) image_cnr = show_lidar_corners(im, cls_dets_cnr[:, :24], calib) if 1: import mayavi.mlab as mlab filename = os.path.join( imdb.lidar_path_at(i)[:-19], 'velodyne', str(i).zfill(6) + '.bin') print filename scan = np.fromfile(filename, dtype=np.float32) scan = scan.reshape((-1, 4)) corners = cls_dets_cnr[:, :24].reshape( (-1, 3, 8)).transpose((0, 2, 1)) corners_r = cls_dets_cnr_r[:, :24].reshape( (-1, 3, 8)).transpose((0, 2, 1)) print corners_r fig = mlab.figure(figure=None, bgcolor=(0, 0, 0), fgcolor=None, engine=None, size=(1000, 500)) draw_lidar(scan, fig=fig) draw_gt_boxes3d(corners, fig=fig) draw_gt_boxes3d(corners_r, color=(1, 0, 1), fig=fig) mlab.show() plt.subplot(211) plt.title('bv proposal') plt.imshow(image_bv, cmap='jet') plt.subplot(212) plt.imshow(image_cnr) plt.show() all_boxes[j][i] = cls_dets # all_boxes_img[j][i] = cls_des_img all_boxes_cnr[j][i] = cls_dets_cnr if vis: plt.show() # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] # all_boxes_img[j][i] = all_boxes_img[j][i][keep, :] all_boxes_cnr[j][i] = all_boxes_cnr[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) det_cnr_file = os.path.join(output_dir, 'detections_cnr.pkl') with open(det_cnr_file, 'wb') as f: cPickle.dump(all_boxes_cnr, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, all_boxes_cnr, output_dir)
def proposal_target_layer_3d(rpn_rois_bv, rpn_rois_3d, gt_boxes_bv, gt_boxes_3d, gt_boxes_corners, calib, _num_classes): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. """ # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # (i.e., rpn.proposal_layer.ProposalLayer), or any other source # TODO(rbg): it's annoying that sometimes I have extra info before # and other times after box coordinates -- normalize to one format all_rois = rpn_rois_bv # if DEBUG: # print "gt_boxes_bv: ", gt_boxes_bv, gt_boxes_bv.shape # print "gt_boxes_bv: ", gt_boxes_bv[:, :-1] # print "gt_boxes_3d: ", gt_boxes_3d, gt_boxes_3d.shape # print "gt_boxes_3d: ", gt_boxes_3d[:, :-1] # Include ground-truth boxes in the set of candidate rois zeros = np.zeros((gt_boxes_bv.shape[0], 1), dtype=gt_boxes_bv.dtype) all_rois = np.vstack( (all_rois, np.hstack((zeros, gt_boxes_bv[:, :-1]))) ) all_rois_3d = np.vstack( (rpn_rois_3d, np.hstack((zeros, gt_boxes_3d[:, :-1]))) ) if DEBUG: print "rpn rois 3d shape: ", rpn_rois_3d.shape print "all_rois bv shape: ", all_rois.shape print "all_rois_3d shape: ", all_rois_3d.shape # Sanity check: single batch only assert np.all(all_rois[:, 0] == 0), \ 'Only single item batches are supported' num_images = 1 rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) # Sample rois with classification labels and bounding box regression # targets labels, rois_bv, rois_cnr, rois_3d, bbox_targets = _sample_rois_3d( all_rois, all_rois_3d, gt_boxes_bv, gt_boxes_corners, fg_rois_per_image, rois_per_image, _num_classes) if DEBUG: print "labels shape: ", labels.shape print "keep_inds: ", keep_inds print "all_rois_bv shape:, ", all_rois_bv.shape print "rois_3d shape:, ", rois_3d.shape print "rois_cnr shape:, ", rois_cnr.shape rois_img = lidar_cnr_to_img(rois_cnr[:,1:25], calib[3], calib[2], calib[0]) rois_img = np.hstack((rois_bv[:,0].reshape(-1, 1), rois_img)) if DEBUG: print "after sample" print labels.shape print 'num fg: {}'.format((labels > 0).sum()) print 'num bg: {}'.format((labels == 0).sum()) print 'rois_bv shape: ', rois_bv.shape print 'rois_3d shape: ', rois_3d.shape print 'bbox_targets shape: ', bbox_targets.shape rois_bv = rois_bv.reshape(-1, 5).astype(np.float32) rois_img = rois_img.reshape(-1, 5).astype(np.float32) rois_3d = rois_3d.reshape(-1,7).astype(np.float32) labels = labels.reshape(-1,1).astype(np.int32) bbox_targets = bbox_targets.reshape(-1,_num_classes*24).astype(np.float32) return rois_bv, rois_img, labels, bbox_targets, rois_3d