def project_img_to_point_cloud(points, image, calib_dir, img_idx): """ Projects image colours to point cloud points Arguments: points (N by [x,y,z]): list of points where N is the number of points image (X by Y by [r,g,b]): colour values in image space calib_dir (str): calibration directory img_idx (int): index of the requested image Returns: [N by [r,g,b]]: Matrix of colour codes. Indices of colours correspond to the indices of the points in the 'points' argument """ # Save the pixel colour corresponding to each point frame_calib = calib.read_calibration(calib_dir, img_idx) point_in_im = calib.project_to_image(points.T, p=frame_calib.p2).T point_in_im_rounded = np.floor(point_in_im) point_in_im_rounded = point_in_im_rounded.astype(np.int32) point_colours = [] for point in point_in_im_rounded: point_colours.append(image[point[1], point[0], :]) point_colours = np.asanyarray(point_colours) return point_colours
def main(): test_pipeline_config_path = mlod.root_dir() + \ '/data/configs/official/cars/cars_000_vanilla.config' model_config, train_config, _, dataset_config = \ config_builder_util.get_configs_from_pipeline_file( test_pipeline_config_path, is_training=True) # train_val_test = 'val' # dataset_config.data_split = 'val' train_val_test = 'test' dataset_config.data_split = 'trainval' dataset_config.data_split_dir = 'training' dataset_config.has_labels = False # dataset_config.cache_config.cache_images = True # dataset_config.cache_config.cache_depth_maps = True dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) kitti_utils = dataset.kitti_utils bev_source = 'lidar' # sample_name = '000000' # img_idx = np.random.randint(0, 1000) # sample_name = '{:06d}'.format(img_idx) num_samples = 200 all_load_times = [] all_bev_times = [] for sample_idx in range(num_samples): sys.stdout.write('\rSample {} / {}'.format(sample_idx, num_samples - 1)) img_idx = sample_idx sample_name = '{:06d}'.format(sample_idx) loading_start_time = time.time() # Load image image = cv2.imread(dataset.get_rgb_image_path(sample_name)) image_shape = image.shape[0:2] calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx) point_cloud = kitti_utils.get_point_cloud(bev_source, int(sample_name), image_shape) ground_plane = kitti_utils.get_ground_plane(sample_name) all_load_times.append(time.time() - loading_start_time) bev_start_time = time.time() bev_maps = kitti_utils.create_bev_maps(point_cloud, ground_plane) bev_end_time = time.time() all_bev_times.append(bev_end_time - bev_start_time) print('') print('Load mean:', np.mean(all_load_times)) print('Load median:', np.median(all_load_times)) print('BEV mean:', np.mean(all_bev_times)) print('BEV median:', np.median(all_bev_times))
def load_samples(self, indices): sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name if self.has_labels: obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) label_classes, label_boxes_3d, label_boxes_2d = self.parse_obj_labels( obj_labels, self.label_map) else: obj_labels = None label_classes = np.zeros(1) label_boxes_2d = np.zeros((1, 4)) label_boxes_3d = np.zeros((1, 7)) # image cv_bgr_image = cv2.imread(self.get_rbg_image_path( int(sample_name))) rgb_image = cv_bgr_image[..., ::-1] im_shape = rgb_image.shape[0:2] image_input = rgb_image # calibration stereo_calib_p2 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p2 # point cloud # just project point to camera frame and then keep point in front of image point_cloud = obj_utils.get_lidar_point_cloud(int(sample_name), self.calib_dir, self.velo_dir, im_size=im_shape) ################################# # Data Augmentation ################################# if kitti_aug.AUG_FLIPPING in sample.augs: pass if kitti_aug.AUG_PCA_JITTER in sample.augs: pass sample_dict = { constants.KEY_IMAGE_INPUT: image_input, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_LABEL_BOXES_2D: label_boxes_2d, constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2 } sample_dicts.append(sample_dict) return sample_dicts
def get_lidar_point_cloud_with_color(img_idx, img_dir, calib_dir, velo_dir, im_size=None): """ Calculates the lidar point cloud, and optionally returns only the points that are projected to the image. :param img_idx: image index :param calib_dir: directory with calibration files :param velo_dir: directory with velodyne files :param im_size: (optional) 2 x 1 list containing the size of the image to filter the point cloud [w, h] :param min_intensity: (optional) minimum intensity required to keep a point :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ # Read calibration info frame_calib = calib_utils.read_calibration(calib_dir, img_idx) x, y, z, i = calib_utils.read_lidar(velo_dir=velo_dir, img_idx=img_idx) # Calculate the point cloud pts = np.vstack((x, y, z)).T pts = calib_utils.lidar_to_cam_frame(pts, frame_calib) # The given image is assumed to be a 2D image if not im_size: point_cloud = pts.T return point_cloud else: # Only keep points in front of camera (positive z) pts = pts[pts[:, 2] > 0] point_cloud = pts.T # Project to image frame point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T # Filter based on the given image size image_filter = (point_in_im[:, 0] > 0) & \ (point_in_im[:, 0] < im_size[0]) & \ (point_in_im[:, 1] > 0) & \ (point_in_im[:, 1] < im_size[1]) img_dir = img_dir + "/%06d.png" % img_idx img = Image.open(img_dir) img = np.array(img) point_colors = img[point_in_im[image_filter, 1].astype(np.int), point_in_im[image_filter, 0].astype(np.int)] # return np.vstack((pts[image_filter].T, point_colors[image_filter].T)) return pts[image_filter].T, point_colors.T
def test_depth_from_disparity(self): # Just to check if method works without errors. calib_dir = ROOTDIR + '/tests/test_data/calib' disp_dir = ROOTDIR + '/tests/test_data' img_idx = 1 disp = calib.read_disparity(disp_dir, img_idx) frame_calib = calib.read_calibration(calib_dir, img_idx) stereo_calibration_info = calib.get_stereo_calibration(frame_calib.p2, frame_calib.p3) x, y, z = calib.depth_from_disparity(disp, stereo_calibration_info)
def get_lidar_point_cloud(img_idx, calib_dir, velo_dir, im_size=None, min_intensity=None): """ Calculates the lidar point cloud, and optionally returns only the points that are projected to the image. :param img_idx: image index :param calib_dir: directory with calibration files :param velo_dir: directory with velodyne files :param im_size: (optional) 2 x 1 list containing the size of the image to filter the point cloud [w, h] :param min_intensity: (optional) minimum intensity required to keep a point :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ # Read calibration info frame_calib = calib_utils.read_calibration(calib_dir, img_idx)#读取calib文件信息并保存到对象中 x, y, z, i = calib_utils.read_lidar(velo_dir=velo_dir, img_idx=img_idx)#从文件读取点云数据的x,y,z,和密度 # Calculate the point cloud pts = np.vstack((x, y, z)).T#点云位置信息 pts = calib_utils.lidar_to_cam_frame(pts, frame_calib)#点云投射到相机坐标 # The given image is assumed to be a 2D image if not im_size: point_cloud = pts.T return point_cloud else: # Only keep points in front of camera (positive z) 相机坐标是z轴,已经投影到相机坐标了 pts = pts[pts[:, 2] > 0] point_cloud = pts.T # Project to image frame #投影到像素坐标 point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T # Filter based on the given image size 保留在图片范围的点云,坐标在相机坐标系下 image_filter = (point_in_im[:, 0] > 0) & \ (point_in_im[:, 0] < im_size[0]) & \ (point_in_im[:, 1] > 0) & \ (point_in_im[:, 1] < im_size[1])#索引值 if not min_intensity: return pts[image_filter].T else: intensity_filter = i > min_intensity point_filter = np.logical_and(image_filter, intensity_filter) return pts[point_filter].T
def test_read_calibration(self): # The test file used for calibration is 724513.txt test_data_dir = ROOTDIR + "/tests/test_data/calib/" calib_out = calib.read_calibration(test_data_dir, 724513) test_data_dir = ROOTDIR + "/tests/test_data/" calib_true = scipy.io.loadmat(test_data_dir+'readcalib.mat') np.testing.assert_almost_equal(calib_out.p0, calib_true['p0']) np.testing.assert_almost_equal(calib_out.p1, calib_true['p1']) np.testing.assert_almost_equal(calib_out.p2, calib_true['p2']) np.testing.assert_almost_equal(calib_out.p3, calib_true['p3']) np.testing.assert_almost_equal(calib_out.r0_rect, calib_true['r0_rect']) np.testing.assert_almost_equal(calib_out.tr_velodyne_to_cam, calib_true['tr_velo_to_cam'])
def test_project_to_image_space_tensors(self): anchors = np.asarray([[0, 0, 3, 2, 0, 6], [3, 0, 3, 2, 0, 2]], dtype=np.float64) img_idx = int('000217') img_shape = [375, 1242] dataset_config = DatasetBuilder.copy_config( DatasetBuilder.KITTI_UNITTEST) dataset_config.data_split = 'train' dataset_config.dataset_dir = tests.test_path() + \ "/datasets/Kitti/object" dataset = DatasetBuilder().build_kitti_dataset(dataset_config) stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 # Project the 3D points in numpy space img_corners, img_corners_norm = anchor_projector.project_to_image_space( anchors, stereo_calib_p2, img_shape) # convert the required params to tensors tf_stereo_calib_p2 = tf.convert_to_tensor(stereo_calib_p2, dtype=tf.float32) tf_anchors = tf.convert_to_tensor(anchors, dtype=tf.float32) tf_img_shape = tf.convert_to_tensor(img_shape, dtype=tf.float32) # Project the 3D points in tensor space img_corners_tensor, img_corners_norm_tensor = \ anchor_projector.tf_project_to_image_space(tf_anchors, tf_stereo_calib_p2, tf_img_shape) sess = tf.Session() with sess.as_default(): img_corners_out = img_corners_tensor.eval() img_corners_norm_out = img_corners_norm_tensor.eval() np.testing.assert_allclose(img_corners, img_corners_out, atol=1e-04, err_msg='Incorrect corner projection') np.testing.assert_allclose( img_corners_norm, img_corners_norm_out, atol=1e-04, err_msg='Incorrect normalized corner projection')
def _project_and_show(self, sample_name, point_cloud, color, title): "将点云投影到像素坐标,并在对应的图像中显示" img_idx = int(sample_name) img = Image.open(self.dataset.get_rgb_image_path(sample_name)) img_array = np.array( img) #np.array(默认情况下)将会copy该对象,而np.asarray除非必要,否则不会copy该对象 frame_calib = calib_utils.read_calibration( self.dataset.calib_dir, img_idx) #读取calib文件信息并保存到对象中 point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T point_in_im = point_in_im[:, [1, 0]] point_in_im = point_in_im.astype(int) img_array[point_in_im[:, 0], point_in_im[:, 1], :] = ImageColor.getrgb(color) #相当于zip img = Image.fromarray(img_array) img.show()
def test_read_lidar(self): test_data_dir = ROOTDIR + "/tests/test_data/calib" velo_mat = scipy.io.loadmat(test_data_dir + '/test_velo.mat') velo_true = velo_mat['current_frame']['xyz_velodyne'][0][0][:,0:3] x, y, z, i = calib.read_lidar(velo_dir=test_data_dir, img_idx=0) velo_test = np.vstack((x, y, z)).T np.testing.assert_almost_equal(velo_true, velo_test, decimal=5, verbose=True) velo_mat = scipy.io.loadmat(test_data_dir + '/test_velo_tf.mat') velo_true_tf = velo_mat['velo_cam_frame'] calib_out = calib.read_calibration(test_data_dir, 0) xyz_cam = calib.lidar_to_cam_frame(velo_test, calib_out) np.testing.assert_almost_equal(velo_true_tf, xyz_cam, decimal=5, verbose=True)
def get_depth_map_point_cloud(img_idx, calib_dir, depth_dir, im_size): """ Calculates the point cloud from a depth map :param img_idx: image index :param calib_dir: directory with calibration files :param depth_dir: directory with depth maps :param im_size: size of the image [h, w] :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ depth_map = depth_map_utils.get_depth_map(img_idx, depth_dir) # Calculate point cloud from depth map frame_calib = calib.read_calibration(calib_dir, img_idx) stereo_calibration_info = calib.get_stereo_calibration( frame_calib.p2, frame_calib.p3) # Calculate points from depth map depth_map_flattened = depth_map.flatten() xx, yy = np.meshgrid(np.arange(1, im_size[0] + 1, 1), np.arange(1, im_size[1] + 1, 1)) xx = xx.flatten() - stereo_calibration_info.center_u yy = yy.flatten() - stereo_calibration_info.center_v temp = np.divide(depth_map_flattened, stereo_calibration_info.f) x = np.multiply(xx, temp) y = np.multiply(yy, temp) z = depth_map_flattened # Get x offset (b_cam) from calibration: cam_mat[0, 3] = (-f_x * b_cam) x_offset = -stereo_calibration_info.p[0, 3] / stereo_calibration_info.f point_cloud = np.asarray([x + x_offset, y, z]) points = point_cloud.T # Filter points to image frame point_in_im = calib.project_to_image(points.T, p=frame_calib.p2).T image_filter = \ (point_in_im[:, 0] > 0) & (point_in_im[:, 0] < im_size[0]) & \ (point_in_im[:, 1] > 0) & (point_in_im[:, 1] < im_size[1]) filtered_point_cloud = points[image_filter].T return filtered_point_cloud
def create_framecalib(from_pandora=True): # These values are collected from the ROS calibration matrix if from_pandora: frame_calib = FrameCalibrationData() p2 = [ 1275.28898946, 0.0, 622.0, 0.0, 0.0, 725.783914414, 185.0, 0.0, 0.0, 0.0, 1.0, 0.0 ] p2 = np.reshape(p2, (3, 4)) tr_velodyne_to_cam = [0, -1, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0] tr_velodyne_to_cam = np.reshape(tr_velodyne_to_cam, (3, 4)) r0 = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0] frame_calib.p2 = p2 frame_calib.tr_velodyne_to_cam = tr_velodyne_to_cam frame_calib.r0_rect = np.reshape(r0, (3, 3)) return frame_calib else: # This is the correct form if testing with kitti data return calib_utils.read_calibration( "/notebooks/DATA/Kitti/object/testing/calib", 1)
def main(): # Start of the Kitti demo code print('=== Python Kitti Wrapper Demo ===') # Setting Paths data_set = 'training' cam = 2 root_dir = '/notebooks/DATA/Kitti/object/' #os.path.expanduser('~') + '/Kitti/object/' image_dir = os.path.join(root_dir, data_set) + '/image_' + str(cam) label_dir = os.path.join(root_dir, data_set) + '/label_' + str(cam) calib_dir = os.path.join(root_dir, data_set) + '/calib' img_idx = int(random.random() * 100) print('img_idx', img_idx) # Run Visualization Function f, ax1, ax2 = vis_utils.visualization(image_dir, img_idx) # Run the main loop to run throughout the images frame_calibration_info = calib_utils.read_calibration(calib_dir, img_idx) p = frame_calibration_info.p2 # Load labels objects = obj_utils.read_labels(label_dir, img_idx) # For all annotated objects for obj in objects: # Draw 2D and 3D boxes vis_utils.draw_box_2d(ax1, obj) vis_utils.draw_box_3d(ax2, obj, p) # Render results plt.draw() plt.show()
def project_flipped_img_to_point_cloud(points, image_flipped, calib_dir, img_idx): """ Projects image colours to point cloud points Arguments: points (N by [x,y,z]): list of points where N is the number of points image (Y by X by [r,g,b]): colour values in image space calib_dir (str): calibration directory img_idx (int): index of the requested image Returns: [N by [r,g,b]]: Matrix of colour codes. Indices of colours correspond to the indices of the points in the 'points' argument """ # Save the pixel colour corresponding to each point frame_calib = calib_utils.read_calibration(calib_dir, img_idx) # Fix flipped p2 matrix flipped_p2 = np.copy(frame_calib.p2) flipped_p2[0, 2] = image_flipped.shape[1] - flipped_p2[0, 2] flipped_p2[0, 3] = -flipped_p2[0, 3] # Use fixed matrix point_in_im = calib_utils.project_to_image(points.T, p=flipped_p2).T point_in_im_rounded = np.floor(point_in_im) point_in_im_rounded = point_in_im_rounded.astype(np.int32) # image_shape = image_flipped.shape point_colours = [] for point in point_in_im_rounded: point_colours.append(image_flipped[point[1], point[0], :]) point_colours = np.asanyarray(point_colours) return point_colours
def test_compute_box_3d(self): # read in calib file and label file and mat file calib_frame = calib.read_calibration(self.test_data_calib_dir, 724513) objects = obj_utils.read_labels(self.test_data_label_dir, 5258) label_true = scipy.io.loadmat(self.test_data_dir + '/compute3d.mat') # compute corners_3d = obj_utils.compute_box_corners_3d(objects[0]) corners, face_idx = obj_utils.project_box3d_to_image( corners_3d, calib_frame.p2) # compare data np.testing.assert_almost_equal(corners, label_true['corners']) orientation = obj_utils.compute_orientation_3d(objects[0], calib_frame.p2) # -1 for index in python vs matlab self.assertTrue((face_idx == label_true['face_idx']-1).all()) # Test orientation self.assertAlmostEqual(orientation.all(), label_true['orientation'].all()) return
def get_stereo_point_cloud(img_idx, calib_dir, disp_dir): """ Gets the point cloud for an image calculated from the disparity map :param img_idx: image index :param calib_dir: directory with calibration files :param disp_dir: directory with disparity images :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ disp = cv2.imread(disp_dir + "/%06d_left_disparity.png" % img_idx, cv2.IMREAD_ANYDEPTH) # Read calibration info frame_calib = calib.read_calibration(calib_dir, img_idx) stereo_calibration_info = calib.get_stereo_calibration( frame_calib.p2, frame_calib.p3) # Calculate the point cloud point_cloud = calib.depth_from_disparity(disp, stereo_calibration_info) return point_cloud
def main(): """This demo shows RPN proposals and AVOD predictions in 3D and 2D in image space. Given certain thresholds for proposals and predictions, it selects and draws the bounding boxes on the image sample. It goes through the entire proposal and prediction samples for the given dataset split. The proposals, overlaid, and prediction images can be toggled on or off separately in the options section. The prediction score and IoU with ground truth can be toggled on or off as well, shown as (score, IoU) above the detection. """ dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL) ############################## # Options ############################## dataset_config.data_split = 'val' fig_size = (10, 6.1) rpn_score_threshold = 0.1 avod_score_threshold = 0.1 # gt_classes = ['Car'] gt_classes = ['Pedestrian', 'Cyclist'] # gt_classes = ['Car', 'Pedestrian', 'Cyclist'] # Overwrite this to select a specific checkpoint global_step = None checkpoint_name = sys.argv[1] #'pyramid_cars_with_aug_example' # Drawing Toggles draw_proposals_separate = False draw_overlaid = False draw_predictions_separate = True # Show orientation for both GT and proposals/predictions draw_orientations_on_prop = False draw_orientations_on_pred = False # Draw 2D bounding boxes draw_projected_2d_boxes = True # Save images for samples with no detections save_empty_images = True draw_score = True draw_iou = True ############################## # End of Options ############################## # Get the dataset dataset = DatasetBuilder.build_kitti_dataset(dataset_config) # Setup Paths predictions_dir = avod.root_dir() + \ '/data/outputs/' + checkpoint_name + '/predictions' proposals_and_scores_dir = predictions_dir + \ '/proposals_and_scores/' + dataset.data_split predictions_and_scores_dir = predictions_dir + \ '/final_predictions_and_scores/' + dataset.data_split # Output images directories output_dir_base = predictions_dir + '/images_2d' # Get checkpoint step steps = os.listdir(proposals_and_scores_dir) steps.sort(key=int) print('Available steps: {}'.format(steps)) # Use latest checkpoint if no index provided if global_step is None: global_step = steps[-1] if draw_proposals_separate: prop_out_dir = output_dir_base + '/proposals/{}/{}/{}'.format( dataset.data_split, global_step, rpn_score_threshold) if not os.path.exists(prop_out_dir): os.makedirs(prop_out_dir) print('Proposal images saved to:', prop_out_dir) if draw_overlaid: overlaid_out_dir = output_dir_base + '/overlaid/{}/{}/{}'.format( dataset.data_split, global_step, avod_score_threshold) if not os.path.exists(overlaid_out_dir): os.makedirs(overlaid_out_dir) print('Overlaid images saved to:', overlaid_out_dir) if draw_predictions_separate: pred_out_dir = output_dir_base + '/predictions/{}/{}/{}'.format( dataset.data_split, global_step, avod_score_threshold) if not os.path.exists(pred_out_dir): os.makedirs(pred_out_dir) print('Prediction images saved to:', pred_out_dir) # Rolling average array of times for time estimation avg_time_arr_length = 10 last_times = np.repeat(time.time(), avg_time_arr_length) + \ np.arange(avg_time_arr_length) for sample_idx in range(dataset.num_samples): # Estimate time remaining with 5 slowest times start_time = time.time() last_times = np.roll(last_times, -1) last_times[-1] = start_time avg_time = np.mean(np.sort(np.diff(last_times))[-5:]) samples_remaining = dataset.num_samples - sample_idx est_time_left = avg_time * samples_remaining # Print progress and time remaining estimate sys.stdout.write('\rSaving {} / {}, Avg Time: {:.3f}s, ' 'Time Remaining: {:.2f}s'.format( sample_idx + 1, dataset.num_samples, avg_time, est_time_left)) sys.stdout.flush() sample_name = dataset.sample_names[sample_idx] img_idx = int(sample_name) ############################## # Proposals ############################## if draw_proposals_separate or draw_overlaid: # Load proposals from files proposals_file_path = proposals_and_scores_dir + \ "/{}/{}.txt".format(global_step, sample_name) if not os.path.exists(proposals_file_path): print('Sample {}: No proposals, skipping'.format(sample_name)) continue print('Sample {}: Drawing proposals'.format(sample_name)) proposals_and_scores = np.loadtxt(proposals_file_path) proposal_boxes_3d = proposals_and_scores[:, 0:7] proposal_scores = proposals_and_scores[:, 7] # Apply score mask to proposals score_mask = proposal_scores > rpn_score_threshold proposal_boxes_3d = proposal_boxes_3d[score_mask] proposal_scores = proposal_scores[score_mask] proposal_objs = \ [box_3d_encoder.box_3d_to_object_label(proposal, obj_type='Proposal') for proposal in proposal_boxes_3d] ############################## # Predictions ############################## if draw_predictions_separate or draw_overlaid: predictions_file_path = predictions_and_scores_dir + \ "/{}/{}.txt".format(global_step, sample_name) if not os.path.exists(predictions_file_path): continue # Load predictions from files predictions_and_scores = np.loadtxt( predictions_and_scores_dir + "/{}/{}.txt".format(global_step, sample_name)) prediction_boxes_3d = predictions_and_scores[:, 0:7] prediction_scores = predictions_and_scores[:, 7] prediction_class_indices = predictions_and_scores[:, 8] # process predictions only if we have any predictions left after # masking if len(prediction_boxes_3d) > 0: # Apply score mask avod_score_mask = prediction_scores >= avod_score_threshold prediction_boxes_3d = prediction_boxes_3d[avod_score_mask] prediction_scores = prediction_scores[avod_score_mask] prediction_class_indices = \ prediction_class_indices[avod_score_mask] # # Swap l, w for predictions where w > l # swapped_indices = \ # prediction_boxes_3d[:, 4] > prediction_boxes_3d[:, 3] # prediction_boxes_3d = np.copy(prediction_boxes_3d) # prediction_boxes_3d[swapped_indices, 3] = \ # prediction_boxes_3d[swapped_indices, 4] # prediction_boxes_3d[swapped_indices, 4] = \ # prediction_boxes_3d[swapped_indices, 3] ############################## # Ground Truth ############################## # Get ground truth labels if dataset.has_labels: gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx) else: gt_objects = [] # Filter objects to desired difficulty filtered_gt_objs = dataset.kitti_utils.filter_labels( gt_objects, classes=gt_classes) boxes2d, _, _ = obj_utils.build_bbs_from_objects( filtered_gt_objs, class_needed=gt_classes) image_path = dataset.get_rgb_image_path(sample_name) image = Image.open(image_path) image_size = image.size # Read the stereo calibration matrix for visualization stereo_calib = calib_utils.read_calibration(dataset.calib_dir, img_idx) calib_p2 = stereo_calib.p2 ############################## # Reformat and prepare to draw ############################## if draw_proposals_separate or draw_overlaid: proposals_as_anchors = box_3d_encoder.box_3d_to_anchor( proposal_boxes_3d) proposal_boxes, _ = anchor_projector.project_to_image_space( proposals_as_anchors, calib_p2, image_size) num_of_proposals = proposal_boxes_3d.shape[0] prop_fig, prop_2d_axes, prop_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False) draw_proposals(filtered_gt_objs, calib_p2, num_of_proposals, proposal_objs, proposal_boxes, prop_2d_axes, prop_3d_axes, draw_orientations_on_prop) if draw_proposals_separate: # Save just the proposals filename = prop_out_dir + '/' + sample_name + '.png' plt.savefig(filename) if not draw_overlaid: plt.close(prop_fig) if draw_overlaid or draw_predictions_separate: if len(prediction_boxes_3d) > 0: # Project the 3D box predictions to image space image_filter = [] final_boxes_2d = [] for i in range(len(prediction_boxes_3d)): box_3d = prediction_boxes_3d[i, 0:7] img_box = box_3d_projector.project_to_image_space( box_3d, calib_p2, truncate=True, image_size=image_size, discard_before_truncation=False) if img_box is not None: image_filter.append(True) final_boxes_2d.append(img_box) else: image_filter.append(False) final_boxes_2d = np.asarray(final_boxes_2d) final_prediction_boxes_3d = prediction_boxes_3d[image_filter] final_scores = prediction_scores[image_filter] final_class_indices = prediction_class_indices[image_filter] num_of_predictions = final_boxes_2d.shape[0] # Convert to objs final_prediction_objs = \ [box_3d_encoder.box_3d_to_object_label( prediction, obj_type='Prediction') for prediction in final_prediction_boxes_3d] for (obj, score) in zip(final_prediction_objs, final_scores): obj.score = score else: if save_empty_images: pred_fig, pred_2d_axes, pred_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False, fig_size=fig_size) filename = pred_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(pred_fig) continue if draw_overlaid: # Overlay prediction boxes on image draw_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, prop_2d_axes, prop_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred) filename = overlaid_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(prop_fig) if draw_predictions_separate: # Now only draw prediction boxes on images # on a new figure handler if draw_projected_2d_boxes: pred_fig, pred_2d_axes, pred_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False, fig_size=fig_size) draw_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, pred_2d_axes, pred_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred) else: pred_fig, pred_3d_axes = \ vis_utils.visualize_single_plot( dataset.rgb_image_dir, img_idx, display=False) draw_3d_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, pred_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred) filename = pred_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(pred_fig) print('\nDone')
def main(): """ Converts a set of network predictions into text files required for KITTI evaluation. """ ############################## # Options ############################## checkpoint_name = 'mlod_exp_example' # data_split = 'val' data_split = 'val_half' global_steps = None # global_steps = [28000, 19000, 33000, 34000] score_threshold = 0.1 save_2d = False # Save 2D predictions save_3d = True # Save 2D and 3D predictions together # Checkpoints below this are skipped min_step = 20000 # Object Type obj_type = 'obj' ############################## # End of Options ############################## # Parse experiment config pipeline_config_file = \ mlod.root_dir() + '/data/outputs/' + checkpoint_name + \ '/' + checkpoint_name + '.config' _, _, _, dataset_config = \ config_builder_util.get_configs_from_pipeline_file( pipeline_config_file, is_training=False) # Overwrite defaults dataset_config = config_builder_util.proto_to_obj(dataset_config) dataset_config.data_split = data_split dataset_config.aug_list = [] if data_split == 'test': dataset_config.data_split_dir = 'testing' dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) # Get available prediction folders predictions_root_dir = mlod.root_dir() + '/data/outputs/' + \ checkpoint_name + '/predictions' proposals_root_dir = predictions_root_dir + \ '/proposals_and_scores/' + dataset.data_split print('Converting proposals from', proposals_root_dir) if not global_steps: global_steps = os.listdir(proposals_root_dir) global_steps.sort(key=int) print('Checkpoints found ', global_steps) for step_idx in range(len(global_steps)): global_step = global_steps[step_idx] # Skip first checkpoint if int(global_step) < min_step: continue final_predictions_dir = proposals_root_dir + \ '/' + str(global_step) # 2D and 3D prediction directories kitti_predictions_2d_dir = predictions_root_dir + \ '/kitti_proposals_2d/' + \ dataset.data_split + '/' + \ str(score_threshold) + '/' + \ str(global_step) + '/data' kitti_proposals_3d_dir = predictions_root_dir + \ '/kitti_proposals_3d/' + \ dataset.data_split + '/' + \ str(score_threshold) + '/' + \ str(global_step) + '/data' if save_2d and not os.path.exists(kitti_predictions_2d_dir): os.makedirs(kitti_predictions_2d_dir) if save_3d and not os.path.exists(kitti_proposals_3d_dir): os.makedirs(kitti_proposals_3d_dir) # Do conversion num_samples = dataset.num_samples num_valid_samples = 0 print('\nGlobal step:', global_step) print('Converting proposals from:', final_predictions_dir) if save_2d: print('2D Detections saved to:', kitti_predictions_2d_dir) if save_3d: print('Proposals saved to:', kitti_proposals_3d_dir) for sample_idx in range(num_samples): # Print progress sys.stdout.write('\rConverting {} / {}'.format( sample_idx + 1, num_samples)) sys.stdout.flush() sample_name = dataset.sample_names[sample_idx] prediction_file = sample_name + '.txt' kitti_predictions_2d_file_path = kitti_predictions_2d_dir + \ '/' + prediction_file kitti_predictions_3d_file_path = kitti_proposals_3d_dir + \ '/' + prediction_file predictions_file_path = final_predictions_dir + \ '/' + prediction_file # If no predictions, skip to next file if not os.path.exists(predictions_file_path): if save_2d: np.savetxt(kitti_predictions_2d_file_path, []) if save_3d: np.savetxt(kitti_predictions_3d_file_path, []) continue all_predictions = np.loadtxt(predictions_file_path) # Swap l, w for predictions where w > l swapped_indices = all_predictions[:, 4] > all_predictions[:, 3] fixed_predictions = np.copy(all_predictions) fixed_predictions[swapped_indices, 3] = all_predictions[swapped_indices, 4] fixed_predictions[swapped_indices, 4] = all_predictions[swapped_indices, 3] fixed_predictions[swapped_indices, 6] = np.pi / 2 score_filter = all_predictions[:, 7] >= score_threshold all_predictions = fixed_predictions[score_filter] # If no predictions, skip to next file if len(all_predictions) == 0: if save_2d: np.savetxt(kitti_predictions_2d_file_path, []) if save_3d: np.savetxt(kitti_predictions_3d_file_path, []) continue # Project to image space sample_name = prediction_file.split('.')[0] img_idx = int(sample_name) # Load image for truncation image = Image.open(dataset.get_rgb_image_path(sample_name)) stereo_calib_p2 = calib_utils.read_calibration( dataset.calib_dir, img_idx).p2 boxes = [] image_filter = [] for i in range(len(all_predictions)): box_3d = all_predictions[i, 0:7] img_box = box_3d_projector.project_to_image_space( box_3d, stereo_calib_p2, truncate=True, image_size=image.size, discard_before_truncation=False) # Skip invalid boxes (outside image space) if img_box is None: image_filter.append(False) else: image_filter.append(True) boxes.append(img_box) boxes = np.asarray(boxes) all_predictions = all_predictions[image_filter] # If no predictions, skip to next file if len(boxes) == 0: if save_2d: np.savetxt(kitti_predictions_2d_file_path, []) if save_3d: np.savetxt(kitti_predictions_3d_file_path, []) continue num_valid_samples += 1 # To keep each value in its appropriate position, an array of zeros # (N, 16) is allocated but only values [4:16] are used kitti_predictions = np.zeros([len(boxes), 16]) # Truncation and Occlusion are always empty (see below) # Alpha (Not computed) kitti_predictions[:, 3] = -10 * np.ones( (len(kitti_predictions)), dtype=np.int32) # 2D predictions kitti_predictions[:, 4:8] = boxes[:, 0:4] # 3D predictions # (l, w, h) kitti_predictions[:, 8] = all_predictions[:, 5] kitti_predictions[:, 9] = all_predictions[:, 4] kitti_predictions[:, 10] = all_predictions[:, 3] # (x, y, z) kitti_predictions[:, 11:14] = all_predictions[:, 0:3] # (ry, score) kitti_predictions[:, 14:16] = all_predictions[:, 6:8] # Round detections to 3 decimal places kitti_predictions = np.round(kitti_predictions, 3) # Empty Truncation, Occlusion kitti_empty_1 = -1 * np.ones( (len(kitti_predictions), 2), dtype=np.int32) # Empty 3D (x, y, z) kitti_empty_2 = -1 * np.ones( (len(kitti_predictions), 3), dtype=np.int32) # Empty 3D (h, w, l) kitti_empty_3 = -1000 * np.ones( (len(kitti_predictions), 3), dtype=np.int32) # Empty 3D (ry) kitti_empty_4 = -10 * np.ones( (len(kitti_predictions), 1), dtype=np.int32) # Create Type Array obj_types = [obj_type for i in range(len(kitti_predictions))] # Stack 2D predictions text kitti_text_2d = np.column_stack([ obj_types, kitti_empty_1, kitti_predictions[:, 3:8], kitti_empty_2, kitti_empty_3, kitti_empty_4, kitti_predictions[:, 15] ]) # Stack 3D predictions text kitti_text_3d = np.column_stack( [obj_types, kitti_empty_1, kitti_predictions[:, 3:16]]) # Save to text files if save_2d: np.savetxt(kitti_predictions_2d_file_path, kitti_text_2d, newline='\r\n', fmt='%s') if save_3d: np.savetxt(kitti_predictions_3d_file_path, kitti_text_3d, newline='\r\n', fmt='%s') print('\nNum valid:', num_valid_samples) print('Num samples:', num_samples)
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path( sample_name)) rgb_image = cv_bgr_image[..., :: -1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Load MRCNN mask and features # print('Load MRCNN mask and features') mrcnn_result = self.kitti_utils.get_mrcnn_result(img_idx) # If no pedestrian can be seen on the images, break if not mrcnn_result: print('+++++++++++++ No mrcnn_result. load_samples, early end ++++++++++++++++') return [] image_mrcnn_feature_input = mrcnn_result.item().get('features') image_mrcnn_bbox_input = mrcnn_result.item().get('rois') # rois: [batch, N, (y1, x1, y2, x2)] detection bounding boxes image_mask_input = mrcnn_result.item().get('masks') # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib_p2 = calib_utils.read_calibration(self.calib_dir, int(sample_name)).p2 point_cloud = self.kitti_utils.get_point_cloud(self.bev_source, img_idx, image_shape) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter( image_input[:, :, 0:3]) if obj_labels is not None: label_boxes_3d = np.asarray( [box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) # print('bev_input.shape = ', bev_input.shape) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_IMAGE_MASK_INPUT: image_mask_input, constants.KEY_IMAGE_MRCNN_FEATURE_INPUT: image_mrcnn_feature_input, constants.KEY_IMAGE_MRCNN_BBOX_INPUT: image_mrcnn_bbox_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def save_predictions_in_kitti_format(model, checkpoint_name, data_split, score_threshold, global_step): """ Converts a set of network predictions into text files required for KITTI evaluation. """ dataset = model.dataset # Round this because protobuf encodes default values as full decimal score_threshold = round(score_threshold, 3) # Get available prediction folders predictions_root_dir = avod.root_dir() + '/data/outputs/' + \ checkpoint_name + '/predictions' final_predictions_root_dir = predictions_root_dir + \ '/final_predictions_and_scores/' + dataset.data_split final_predictions_dir = final_predictions_root_dir + \ '/' + str(global_step) # 3D prediction directories kitti_predictions_3d_dir = predictions_root_dir + \ '/kitti_predictions_3d/' + \ dataset.data_split + '/' + \ str(score_threshold) + '/' + \ str(global_step) + '/data' #'/kitti_native_eval/' + \ #str(score_threshold) + '/' + \ #str(global_step) + '/data' if not os.path.exists(kitti_predictions_3d_dir): os.makedirs(kitti_predictions_3d_dir) # Do conversion num_samples = dataset.num_samples num_valid_samples = 0 print('\nGlobal step:', global_step) print('Converting detections from:', final_predictions_dir) print('3D Detections being saved to:', kitti_predictions_3d_dir) for sample_idx in range(num_samples): # Print progress sys.stdout.write('\rConverting {} / {}'.format( sample_idx + 1, num_samples)) sys.stdout.flush() sample_name = dataset.sample_names[sample_idx] prediction_file = sample_name + '.txt' kitti_predictions_3d_file_path = kitti_predictions_3d_dir + \ '/' + prediction_file predictions_file_path = final_predictions_dir + \ '/' + prediction_file # If no predictions, skip to next file if not os.path.exists(predictions_file_path): np.savetxt(kitti_predictions_3d_file_path, []) continue all_predictions = np.loadtxt(predictions_file_path, ndmin=2) # # Swap l, w for predictions where w > l # swapped_indices = all_predictions[:, 4] > all_predictions[:, 3] # fixed_predictions = np.copy(all_predictions) # fixed_predictions[swapped_indices, 3] = all_predictions[ # swapped_indices, 4] # fixed_predictions[swapped_indices, 4] = all_predictions[ # swapped_indices, 3] score_filter = all_predictions[:, 7] >= score_threshold all_predictions = all_predictions[score_filter] # If no predictions, skip to next file if len(all_predictions) == 0: np.savetxt(kitti_predictions_3d_file_path, []) continue # Project to image space sample_name = prediction_file.split('.')[0] img_idx = int(sample_name) # Load image for truncation image = Image.open(dataset.get_rgb_image_path(sample_name)) stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 boxes = [] image_filter = [] for i in range(len(all_predictions)): box_3d = all_predictions[i, 0:7] img_box = box_3d_projector.project_to_image_space( box_3d, stereo_calib_p2, truncate=True, image_size=image.size) # Skip invalid boxes (outside image space) if img_box is None: image_filter.append(False) continue image_filter.append(True) boxes.append(img_box) boxes = np.asarray(boxes) all_predictions = all_predictions[image_filter] # If no predictions, skip to next file if len(boxes) == 0: np.savetxt(kitti_predictions_3d_file_path, []) continue num_valid_samples += 1 # To keep each value in its appropriate position, an array of zeros # (N, 16) is allocated but only values [4:16] are used kitti_predictions = np.zeros([len(boxes), 16]) # Get object types all_pred_classes = all_predictions[:, 8].astype(np.int32) obj_types = [dataset.classes[class_idx] for class_idx in all_pred_classes] # Truncation and Occlusion are always empty (see below) # Alpha (Not computed) kitti_predictions[:, 3] = -10 * np.ones((len(kitti_predictions)), dtype=np.int32) # 2D predictions kitti_predictions[:, 4:8] = boxes[:, 0:4] # 3D predictions # (l, w, h) kitti_predictions[:, 8] = all_predictions[:, 5] kitti_predictions[:, 9] = all_predictions[:, 4] kitti_predictions[:, 10] = all_predictions[:, 3] # (x, y, z) kitti_predictions[:, 11:14] = all_predictions[:, 0:3] # (ry, score) kitti_predictions[:, 14:16] = all_predictions[:, 6:8] # Round detections to 3 decimal places kitti_predictions = np.round(kitti_predictions, 3) # Empty Truncation, Occlusion kitti_empty_1 = -1 * np.ones((len(kitti_predictions), 2), dtype=np.int32) # Stack 3D predictions text kitti_text_3d = np.column_stack([obj_types, kitti_empty_1, kitti_predictions[:, 3:16]]) # Save to text files np.savetxt(kitti_predictions_3d_file_path, kitti_text_3d, newline='\r\n', fmt='%s') print('\nNum valid:', num_valid_samples) print('Num samples:', num_samples)
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib = calib_utils.read_calibration( self.calib_dir, int(sample_name)) stereo_calib_p2 = stereo_calib.p2 point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) # WZN: the flipping augmentation flips both image(in camera frame), pointcloud (in Lidar frame), and calibration #matrix(between cam and Lidar) so the correspondence is still true. if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane, output_indices=self.output_indices) #WZN produce input for sparse pooling if self.output_indices: voxel_indices = bev_images[1] pts_in_voxel = bev_images[2] bev_images = bev_images[0] height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) #import pdb #pdb.set_trace() #WZN produce input for sparse pooling if self.output_indices: sparse_pooling_input1 = produce_sparse_pooling_input( gen_sparse_pooling_input_avod( pts_in_voxel, voxel_indices, stereo_calib, [image_shape[1], image_shape[0]], bev_input.shape[0:2]), stride=[1, 1]) #WZN: Note here avod padded the vgg input by 4, so add it bev_input_padded = np.copy(bev_input.shape[0:2]) bev_input_padded[0] = bev_input_padded[0] + 4 sparse_pooling_input2 = produce_sparse_pooling_input( gen_sparse_pooling_input_avod( pts_in_voxel, voxel_indices, stereo_calib, [image_shape[1], image_shape[0]], bev_input_padded), stride=[8, 8]) sparse_pooling_input = [ sparse_pooling_input1, sparse_pooling_input2 ] else: sparse_pooling_input = None sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, #WZN: for sparse pooling constants.KEY_SPARSE_POOLING_INPUT: sparse_pooling_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def preprocess(self, indices): """Preprocesses anchor info and saves info to files Args: indices (int array): sample indices to process. If None, processes all samples """ # Get anchor stride for class anchor_strides = self._anchor_strides dataset = self._dataset dataset_utils = self._dataset.kitti_utils classes_name = dataset.classes_name # Make folder if it doesn't exist yet output_dir, img_roi_dir, img_roi_norm_dir = self.mini_batch_utils.get_file_path( classes_name, anchor_strides, sample_name=None) os.makedirs(output_dir, exist_ok=True) os.makedirs(img_roi_dir, exist_ok=True) os.makedirs(img_roi_norm_dir, exist_ok=True) # Get clusters for class all_clusters_sizes, _ = dataset.get_cluster_info() anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() # Load indices of data_split all_samples = dataset.sample_list if indices is None: indices = np.arange(len(all_samples)) num_samples = len(indices) # For each image in the dataset, save info on the anchors for sample_idx in indices: # Get image name for given cluster sample_name = all_samples[sample_idx].name img_idx = int(sample_name) # Check for existing files and skip to the next if False and self._check_for_existing( classes_name, anchor_strides, ## modification by benz to debug code sample_name): print("{} / {}: Sample already preprocessed".format( sample_idx + 1, num_samples, sample_name)) continue # Get ground truth and filter based on difficulty ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx) # Filter objects to dataset classes filtered_gt_list = dataset_utils.filter_labels(ground_truth_list) filtered_gt_list = np.asarray(filtered_gt_list) # Filtering by class has no valid ground truth, skip this image if len(filtered_gt_list) == 0: print("{} / {} No {}s for sample {} " "(Ground Truth Filter)".format(sample_idx + 1, num_samples, classes_name, sample_name)) # Output an empty file and move on to the next image. self._save_to_file(classes_name, anchor_strides, sample_name) continue # Get ground plane ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) image = Image.open(dataset.get_rgb_image_path(sample_name)) image_shape = [image.size[1], image.size[0]] ## benz, obtain the calibraiton infor stereo_calib_p2 = calib_utils.read_calibration( dataset.calib_dir, img_idx).p2 # Generate sliced 2D voxel grid for filtering vx_grid_2d = dataset_utils.create_sliced_voxel_grid_2d( sample_name, source=dataset.bev_source, image_shape=image_shape) # List for merging all anchors all_anchor_boxes_3d = [] # Create anchors for each class for class_idx in range(len(dataset.classes)): # Generate anchors for all classes grid_anchor_boxes_3d = anchor_generator.generate( area_3d=self._area_extents, anchor_3d_sizes=all_clusters_sizes[class_idx], anchor_stride=self._anchor_strides[class_idx], ground_plane=ground_plane) all_anchor_boxes_3d.extend(grid_anchor_boxes_3d) # Filter empty anchors all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d) ## anchors are Nx6 [x,y,z,dx,dy,dz] anchors = box_3d_encoder.box_3d_to_anchor(all_anchor_boxes_3d) empty_anchor_filter = anchor_filter.get_empty_anchor_filter_2d( anchors, vx_grid_2d, self._density_threshold) # Calculate anchor info ## benz, we calculate the image IoU infor anchors_info, img_boxes_anchors, img_boxes_anchors_norm = self._calculate_img_anchors_info( all_anchor_boxes_3d, empty_anchor_filter, filtered_gt_list, stereo_calib_p2, image_shape) anchor_ious = anchors_info[:, self.mini_batch_utils.col_ious] valid_iou_indices = np.where(anchor_ious > 0.0)[0] print("{} / {}:" "{:>6} anchors, " "{:>6} iou > 0.0, " "for {:>3} {}(s) for sample {}".format( sample_idx + 1, num_samples, len(anchors_info), len(valid_iou_indices), len(filtered_gt_list), classes_name, sample_name)) # Save anchors info self._save_to_file(classes_name, anchor_strides, sample_name, anchors_info, img_boxes_anchors, img_boxes_anchors_norm)
def main(): """ Visualization of 3D grid anchor generation, showing 2D projections in BEV and image space, and a 3D display of the anchors """ dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_TRAIN) dataset_config.num_clusters[0] = 1 dataset = DatasetBuilder.build_kitti_dataset(dataset_config) label_cluster_utils = LabelClusterUtils(dataset) clusters, _ = label_cluster_utils.get_clusters() # Options img_idx = 1 # fake_clusters = np.array([[5, 4, 3], [6, 5, 4]]) # fake_clusters = np.array([[3, 3, 3], [4, 4, 4]]) fake_clusters = np.array([[4, 2, 3]]) fake_anchor_stride = [5.0, 5.0] ground_plane = [0, -1, 0, 1.72] anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() area_extents = np.array([[-40, 40], [-5, 5], [0, 70]]) # Generate anchors for cars only start_time = time.time() anchor_boxes_3d = anchor_3d_generator.generate( area_3d=dataset.kitti_utils.area_extents, anchor_3d_sizes=fake_clusters, anchor_stride=fake_anchor_stride, ground_plane=ground_plane) all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d) end_time = time.time() print("Anchors generated in {} s".format(end_time - start_time)) # Project into bev bev_boxes, bev_normalized_boxes = \ anchor_projector.project_to_bev(all_anchors, area_extents[[0, 2]]) bev_fig, (bev_axes, bev_normalized_axes) = \ plt.subplots(1, 2, figsize=(16, 7)) bev_axes.set_xlim(0, 80) bev_axes.set_ylim(70, 0) bev_normalized_axes.set_xlim(0, 1.0) bev_normalized_axes.set_ylim(1, 0.0) plt.show(block=False) for box in bev_boxes: box_w = box[2] - box[0] box_h = box[3] - box[1] rect = patches.Rectangle((box[0], box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_axes.add_patch(rect) for normalized_box in bev_normalized_boxes: box_w = normalized_box[2] - normalized_box[0] box_h = normalized_box[3] - normalized_box[1] rect = patches.Rectangle((normalized_box[0], normalized_box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_normalized_axes.add_patch(rect) rgb_fig, rgb_2d_axes, rgb_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx) plt.show(block=False) image_path = dataset.get_rgb_image_path(dataset.sample_names[img_idx]) image_shape = np.array(Image.open(image_path)).shape stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 start_time = time.time() rgb_boxes, rgb_normalized_boxes = \ anchor_projector.project_to_image_space(all_anchors, stereo_calib_p2, image_shape) end_time = time.time() print("Anchors projected in {} s".format(end_time - start_time)) # Read the stereo calibration matrix for visualization stereo_calib = calib_utils.read_calibration(dataset.calib_dir, 0) p = stereo_calib.p2 # Overlay boxes on images for anchor_idx in range(len(anchor_boxes_3d)): anchor_box_3d = anchor_boxes_3d[anchor_idx] obj_label = box_3d_encoder.box_3d_to_object_label(anchor_box_3d) # Draw 3D boxes vis_utils.draw_box_3d(rgb_3d_axes, obj_label, p) # Draw 2D boxes rgb_box_2d = rgb_boxes[anchor_idx] box_x1 = rgb_box_2d[0] box_y1 = rgb_box_2d[1] box_w = rgb_box_2d[2] - box_x1 box_h = rgb_box_2d[3] - box_y1 rect = patches.Rectangle((box_x1, box_y1), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') rgb_2d_axes.add_patch(rect) if anchor_idx % 32 == 0: rgb_fig.canvas.draw() plt.show(block=True)
def convertPredictionsToKitti(dataset, predictions_root_dir, additional_cls): """ Converts a set of network predictions into text files required for KITTI evaluation. """ open_mode = 'w+' if additional_cls: open_mode = 'a+' ############################## # Options ############################## global_steps = None save_to_base = True # global_steps = [28000, 19000, 33000, 34000] score_threshold = 0.01 save_2d = False # Save 2D predictions save_3d = True # Save 2D and 3D predictions together save_alphas = True # Save alphas (observation angles) # Checkpoints below this are skipped min_step = 20000 ############################## # End of Options ############################## final_predictions_root_dir = predictions_root_dir + \ '/final_predictions_and_scores/' + dataset.data_split logging.info('Converting detections from %s', final_predictions_root_dir) if not global_steps: global_steps = os.listdir(final_predictions_root_dir) global_steps.sort(key=int) logging.debug('Checkpoints found {}'.format(global_steps)) for step_idx in range(len(global_steps)): global_step = global_steps[step_idx] # Skip first checkpoint if int(global_step) < min_step: continue final_predictions_dir = final_predictions_root_dir + \ '/' + str(global_step) if save_to_base: kitti_predictions_2d_dir = predictions_root_dir kitti_predictions_3d_dir = predictions_root_dir else: # 2D and 3D prediction directories kitti_predictions_2d_dir = predictions_root_dir + \ '/kitti_predictions_2d/' + \ dataset.data_split + '/' + \ str(score_threshold) + '/' + \ str(global_step) + '/data' kitti_predictions_3d_dir = predictions_root_dir + \ '/kitti_predictions_3d/' + \ dataset.data_split + '/' + \ str(score_threshold) + '/' + \ str(global_step) + '/data' if save_2d and not os.path.exists(kitti_predictions_2d_dir): os.makedirs(kitti_predictions_2d_dir) if save_3d and not os.path.exists(kitti_predictions_3d_dir): os.makedirs(kitti_predictions_3d_dir) # Do conversion num_samples = dataset.num_samples num_valid_samples = 0 logging.info('\nGlobal step: %d', int(global_step)) logging.info('Converting detections from: %s', final_predictions_dir) if save_2d: logging.info('2D Detections saved to: %s', kitti_predictions_2d_dir) if save_3d: logging.info('3D Detections saved to: %s', kitti_predictions_3d_dir) for sample_idx in range(num_samples): # Print progress sys.stdout.write('\rConverting {} / {}'.format( sample_idx + 1, num_samples)) sys.stdout.flush() sample_name = dataset.sample_names[sample_idx] prediction_file = sample_name + '.txt' kitti_predictions_2d_file_path = kitti_predictions_2d_dir + \ '/' + prediction_file kitti_predictions_3d_file_path = kitti_predictions_3d_dir + \ '/' + prediction_file predictions_file_path = final_predictions_dir + \ '/' + prediction_file # If no predictions, skip to next file if not os.path.exists(predictions_file_path): if save_2d: np.savetxt(kitti_predictions_2d_file_path, []) if save_3d: np.savetxt(kitti_predictions_3d_file_path, []) continue all_predictions = np.loadtxt(predictions_file_path, ndmin=2) # # Swap l, w for predictions where w > l # swapped_indices = all_predictions[:, 4] > all_predictions[:, 3] # fixed_predictions = np.copy(all_predictions) # fixed_predictions[swapped_indices, 3] = all_predictions[ # swapped_indices, 4] # fixed_predictions[swapped_indices, 4] = all_predictions[ # swapped_indices, 3] score_filter = all_predictions[:, 7] >= score_threshold all_predictions = all_predictions[score_filter] # If no predictions, skip to next file if len(all_predictions) == 0: if save_2d: np.savetxt(kitti_predictions_2d_file_path, []) if save_3d: np.savetxt(kitti_predictions_3d_file_path, []) continue # Project to image space sample_name = prediction_file.split('.')[0] img_idx = int(sample_name) # Load image for truncation image = Image.open(dataset.get_rgb_image_path(sample_name)) stereo_calib_p2 = calib_utils.read_calibration( dataset.calib_dir, img_idx).p2 boxes = [] image_filter = [] for i in range(len(all_predictions)): box_3d = all_predictions[i, 0:7] img_box = box_3d_projector.project_to_image_space( box_3d, stereo_calib_p2, truncate=True, image_size=image.size) # Skip invalid boxes (outside image space) if img_box is None: image_filter.append(False) else: image_filter.append(True) boxes.append(img_box) boxes = np.asarray(boxes) all_predictions = all_predictions[image_filter] # If no predictions, skip to next file if len(boxes) == 0: if save_2d: np.savetxt(kitti_predictions_2d_file_path, []) if save_3d: np.savetxt(kitti_predictions_3d_file_path, []) continue num_valid_samples += 1 # To keep each value in its appropriate position, an array of zeros # (N, 16) is allocated but only values [4:16] are used kitti_predictions = np.zeros([len(boxes), 16]) # Get object types all_pred_classes = all_predictions[:, 8].astype(np.int32) obj_types = [ dataset.classes[class_idx] for class_idx in all_pred_classes ] # Truncation and Occlusion are always empty (see below) # Alpha if not save_alphas: kitti_predictions[:, 3] = -10 * \ np.ones((len(kitti_predictions)), dtype=np.int32) else: alphas = all_predictions[:, 6] - \ np.arctan2(all_predictions[:, 0], all_predictions[:, 2]) kitti_predictions[:, 3] = alphas # 2D predictions kitti_predictions[:, 4:8] = boxes[:, 0:4] # 3D predictions # (l, w, h) kitti_predictions[:, 8] = all_predictions[:, 5] kitti_predictions[:, 9] = all_predictions[:, 4] kitti_predictions[:, 10] = all_predictions[:, 3] # (x, y, z) kitti_predictions[:, 11:14] = all_predictions[:, 0:3] # (ry, score) kitti_predictions[:, 14:16] = all_predictions[:, 6:8] # Round detections to 3 decimal places kitti_predictions = np.round(kitti_predictions, 3) # Empty Truncation, Occlusion kitti_empty_1 = -1 * np.ones( (len(kitti_predictions), 2), dtype=np.int32) # Empty 3D (x, y, z) kitti_empty_2 = -1 * np.ones( (len(kitti_predictions), 3), dtype=np.int32) # Empty 3D (h, w, l) kitti_empty_3 = -1000 * np.ones( (len(kitti_predictions), 3), dtype=np.int32) # Empty 3D (ry) kitti_empty_4 = -10 * np.ones( (len(kitti_predictions), 1), dtype=np.int32) # Stack 2D predictions text kitti_text_2d = np.column_stack([ obj_types, kitti_empty_1, kitti_predictions[:, 3:8], kitti_empty_2, kitti_empty_3, kitti_empty_4, kitti_predictions[:, 15] ]) # Stack 3D predictions text kitti_text_3d = np.column_stack( [obj_types, kitti_empty_1, kitti_predictions[:, 3:16]]) # Save to text files if save_2d: np.savetxt(kitti_predictions_2d_file_path, kitti_text_2d, newline='\r\n', fmt='%s') if save_3d: with open(kitti_predictions_3d_file_path, open_mode) as f: np.savetxt(f, kitti_text_3d, newline='\r\n', fmt='%s') logging.debug('\nNum valid: %d', num_valid_samples) logging.debug('Num samples: %d', num_samples) for the_file in os.listdir(predictions_root_dir): file_path = os.path.join(predictions_root_dir, the_file) try: if os.path.isdir(file_path): shutil.rmtree(file_path) logging.debug("Removing folder: %s", file_path) except Exception as e: print(e) logging.exception(e)
def load_calib(calib_dir, idx, fname=None): # Get calibration stereo_calib = calib_utils.read_calibration(calib_dir, idx, fname) return stereo_calib
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_boxes_2d = np.zeros((1, 4)) label_classes = np.zeros(1) img_idx = int(sample_name) lidar_only = False num_views = 1 if not lidar_only: # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] # Append the depth channel if self.add_depth: depth_map = obj_utils.get_depth_map( img_idx, self.depth_dir) # Set invalid pixels to max depth depth_map[np.asarray(depth_map == 0.0)] = \ self.kitti_utils.bev_extents[1, 1] # Add channel dimension to make stacking easier depth_map = np.expand_dims(depth_map, 2) image_input = np.concatenate([rgb_image, depth_map], axis=2) else: image_input = rgb_image else: image_shape = (370, 1224) # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) #ground_plane = np.array([0,-1,0,1.68]) if lidar_only: p_matrix = np.zeros((num_views, 3, 4), dtype=float) if num_views > 0: p_matrix[0] = np.array([[ 8.39713500e+02, 3.58853400e+01, 4.48566750e+02, 2.31460650e+03 ], [ 1.02835238e-13, 8.54979440e+02, 1.57320433e+02, 2.49655872e+03 ], [ 0.00000000e+00, 7.97452000e-02, 9.96815000e-01, 5.14357000e+00 ]]) p_matrix[1] = np.array([[ 1.20171708e+03, 9.73326000e+01, 3.99933320e+02, 1.04945816e+04 ], [ 1.41054657e+01, 8.65088160e+02, 8.46334690e+01, 5.24229862e+03 ], [ 1.62221000e-01, 1.62221000e-01, 9.73329000e-01, 1.13555000e+01 ]]) else: # Get calibration stereo_calib_p2 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p2 point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: if not lidar_only: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label(obj, image_shape) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) if lidar_only: for i in range(num_views): p_matrix[i] = kitti_aug.flip_stereo_calib_p2( p_matrix[i], image_shape) else: stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if (kitti_aug.AUG_PCA_JITTER in sample.augs) and not lidar_only: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter( image_input[:, :, 0:3], aug_img_noise=self.aug_img_noise) # Augmentation (Random Occlusion) if kitti_aug.AUG_RANDOM_OCC in sample.augs: point_cloud = kitti_aug.occ_aug(point_cloud, stereo_calib_p2, obj_labels) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_boxes_2d = np.asarray([ box_3d_encoder.object_label_to_box_2d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) label_boxes_2d = np.asarray([[-1.0, -1.0, -1.0, -1.0]]) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_boxes_2d = np.zeros((1, 4)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') #bev random masking """ bev_drop_p = 0.5 rand_01 = random.random() mask_bev_layer = np.zeros(height_maps[0].shape,dtype=np.float32) if rand_01 > bev_drop_p: mask_idx = random.randint(0,4) height_maps[mask_idx] = mask_bev_layer """ #print(height_maps[0].shape) density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) #bev_input = np.transpose(np.array(height_maps),(1,2,0)) point_cloud = self.kitti_utils._apply_slice_filter( point_cloud, ground_plane).T if lidar_only: depth_map = np.zeros( (num_views, image_shape[0], image_shape[1]), dtype=float) for i in range(num_views): depth_map[i, :, :] = project_depths( point_cloud, p_matrix[i], image_shape[0:2]) depth_map_expand_dims = np.expand_dims(depth_map, axis=-1) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: depth_map_expand_dims, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: p_matrix[0:num_views], constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs, constants.KEY_DPT_INPUT: depth_map } else: depth_map = project_depths(point_cloud, stereo_calib_p2, image_shape[0:2]) depth_map = np.expand_dims(depth_map, axis=0) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_BOXES_2D: label_boxes_2d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs, constants.KEY_DPT_INPUT: depth_map } sample_dicts.append(sample_dict) return sample_dicts
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) img_roi_all = self.get_img_roi_data(sample_name) #img_roi = all_img_rois[0] #img_roi_norm = all_img_rois[1] if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info, # constants.KEY_IMG_ROI: img_roi, constants.KEY_IMG_ROI_ALL: img_roi_all } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) ## decoded images will have the channels # stored in B G R order. cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib_p2 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p2 point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) ## flipping the image rois img_roi_all = kitti_aug.flip_roi( img_roi_all[0], image_shape), kitti_aug.flip_roi_norm(img_roi_all[1]) if anchors_info: anchor_indices, anchors_ious, anchor_offsets, anchor_classes = anchors_info anchor_offsets[:, 0] = -anchor_offsets[:, 0] anchor_offsets = anchor_indices, anchors_ious, anchor_offsets, anchor_classes # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_boxes_2d = np.asarray([ box_2d_encoder.object_label_to_box_2d(obj_label) for obj_label in obj_labels ]) ## augmentation of dataset if kitti_aug.AUG_FLIPPING in sample.augs: label_boxes_2d = kitti_aug.flip_roi( label_boxes_2d, image_shape) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] img_roi_all = [] #img_roi = [] #img_roi_norm= [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) sample_dict = { # constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_BOXES_2D: label_boxes_2d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_IMG_ROI_ALL: img_roi_all, #constants.KEY_IMG_ROI: img_roi, #constants.KEY_IMG_ROI_NORM:img_roi_norm, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def main(): # Create Dataset dataset_config_path = mlod.root_dir() + \ '/configs/mb_preprocessing/rpn_cars.config' dataset = DatasetBuilder.load_dataset_from_config(dataset_config_path) # Random sample sample_name = '000169' anchor_strides = dataset.kitti_utils.anchor_strides img_idx = int(sample_name) print("Showing mini batch for sample {}".format(sample_name)) image = cv2.imread(dataset.get_rgb_image_path(sample_name)) image_shape = [image.shape[1], image.shape[0]] # KittiUtils class dataset_utils = dataset.kitti_utils ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) point_cloud = obj_utils.get_depth_map_point_cloud(img_idx, dataset.calib_dir, dataset.depth_dir, image_shape) # Grab ground truth ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx) ground_truth_list = dataset_utils.filter_labels(ground_truth_list) stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 ############################## # Flip sample info ############################## start_time = time.time() flipped_image = kitti_aug.flip_image(image) flipped_point_cloud = kitti_aug.flip_point_cloud(point_cloud) flipped_gt_list = [kitti_aug.flip_label_in_3d_only(obj) for obj in ground_truth_list] flipped_ground_plane = kitti_aug.flip_ground_plane(ground_plane) flipped_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) flipped_points = flipped_point_cloud.T print('flip sample', time.time() - start_time) ############################## # Generate anchors ############################## clusters, _ = dataset.get_cluster_info() anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() # Read mini batch info anchors_info = dataset_utils.get_anchors_info( dataset.classes_name, anchor_strides, sample_name) all_anchor_boxes_3d = [] all_ious = [] for class_idx in range(len(dataset.classes)): anchor_boxes_3d = anchor_generator.generate( area_3d=dataset.kitti_utils.area_extents, anchor_3d_sizes=clusters[class_idx], anchor_stride=anchor_strides[class_idx], ground_plane=ground_plane) if anchors_info: indices, ious, offsets, classes = anchors_info # Get non empty anchors from the indices non_empty_anchor_boxes_3d = anchor_boxes_3d[indices] all_anchor_boxes_3d.extend(non_empty_anchor_boxes_3d) all_ious.extend(ious) if not len(all_anchor_boxes_3d) > 0: # Exit early if anchors_info is empty print("No anchors, Please try a different sample") return # Convert to ndarrays all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d) all_ious = np.asarray(all_ious) ############################## # Flip anchors ############################## start_time = time.time() flipped_anchor_boxes_3d = kitti_aug.flip_boxes_3d(all_anchor_boxes_3d, flip_ry=False) print('flip anchors', time.time() - start_time) # Overwrite with flipped things all_anchor_boxes_3d = flipped_anchor_boxes_3d points = flipped_points ground_truth_list = flipped_gt_list ground_plane = flipped_ground_plane
def run_checkpoint_once(self, checkpoint_to_restore): """Evaluates network metrics once over all the validation samples. Args: checkpoint_to_restore: The directory of the checkpoint to restore. """ self._saver.restore(self._sess, checkpoint_to_restore) data_split = self.dataset_config.data_split predictions_base_dir = self.paths_config.pred_dir num_samples = self.model.dataset.num_samples train_val_test = self.model._train_val_test print('model: train_val_test: ', train_val_test) validation = train_val_test == 'val' global_step = trainer_utils.get_global_step(self._sess, self.global_step_tensor) # Rpn average losses dictionary if validation: sum_losses = self._create_losses_dict() # Make sure the box representation is valid predictions_dir = predictions_base_dir + \ "/final_predictions_and_scores/{}/{}".format( data_split, global_step) trainer_utils.create_dir(predictions_dir) num_valid_samples = 0 # Keep track of feed_dict and inference time total_feed_dict_time = [] total_inference_time = [] # Run through a single epoch current_epoch = self.model.dataset.epochs_completed #run_metadata = tf.RunMetadata() #run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) while current_epoch == self.model.dataset.epochs_completed: # Keep track of feed_dict speed start_time = time.time() #feed_dict = self.model.create_feed_dict(sample_index=sample_index) feed_dict = self.model.create_feed_dict() feed_dict_time = time.time() - start_time # Get sample name from model sample_name = self.model.sample_info['sample_name'] stereo_calib = calib_utils.read_calibration( self.model.dataset.calib_dir, int(sample_name)) stereo_calib_p2 = stereo_calib.p2 output_file_path = predictions_dir + \ "/{}.txt".format(sample_name) num_valid_samples += 1 #if num_valid_samples > 1: # break print("Step {}: {} / {}, Inference on sample {}".format( global_step, num_valid_samples, num_samples, sample_name)) # Do predictions, loss calculations, and summaries if validation: if self.summary_merged is not None: predictions, eval_losses, eval_total_loss, summary_out = \ self._sess.run([self._prediction_dict, self._loss_dict, self._total_loss, self.summary_merged], feed_dict=feed_dict) if num_valid_samples == 2 and num_samples == 2: self.summary_writer2.add_summary( summary_out, global_step) else: self.summary_writer.add_summary( summary_out, global_step) else: print('start inference without smry:') predictions, eval_losses, eval_total_loss = \ self._sess.run([self._prediction_dict, self._loss_dict, self._total_loss], feed_dict=feed_dict) #options=run_options, #run_metadata=run_metadata) #self.summary_writer.add_run_metadata(run_metadata, \ # 'step {} sp:{}'.format(global_step/1000, int(sample_name))) self._update_losses(eval_losses, eval_total_loss, sum_losses, global_step) # Save predictions print('save predictions') predictions_and_scores = \ self.get_predicted_boxes_3d_and_scores(predictions, stereo_calib_p2) np.savetxt(output_file_path, predictions_and_scores, fmt='%.5f') # Calculate accuracies #Unnecessary because there is only one class.. object class without bkg class.. self.get_cls_accuracy(predictions, sum_losses, global_step) print("Step {}: Total time {} s".format( global_step, time.time() - start_time)) else: # Test mode --> train_val_test == 'test' inference_start_time = time.time() # Don't calculate loss or run summaries for test predictions = self._sess.run(self._prediction_dict, feed_dict=feed_dict) inference_time = time.time() - inference_start_time # Add times to list total_feed_dict_time.append(feed_dict_time) total_inference_time.append(inference_time) predictions_and_scores = \ self.get_predicted_boxes_3d_and_scores(predictions, stereo_calib_p2) np.savetxt(file_path, predictions_and_scores, fmt='%.5f') # end while current_epoch == model.dataset.epochs_completed: if validation: # Kitti native evaluation, do this during validation # and when running Avod model. # Store predictions in kitti format self.save_prediction_losses_results(sum_losses, num_valid_samples, \ global_step, predictions_base_dir) if self.do_kitti_native_eval: pass #self.run_kitti_native_eval(global_step) else: # Test mode --> train_val_test == 'test' evaluator_utils.print_inference_time_statistics( total_feed_dict_time, total_inference_time) print("Step {}: Finished evaluation, results saved to {}".format( global_step, predictions_dir))
def main(): """This demo shows RPN proposals and AVOD predictions in 3D and 2D in image space. Given certain thresholds for proposals and predictions, it selects and draws the bounding boxes on the image sample. It goes through the entire proposal and prediction samples for the given dataset split. The proposals, overlaid, and prediction images can be toggled on or off separately in the options section. The prediction score and IoU with ground truth can be toggled on or off as well, shown as (score, IoU) above the detection. """ fig_size = (10, 6.1) gt_classes = ['Car', 'Pedestrian', 'Cyclist'] # Output images directories output_dir_base = 'images_2d' data_dir = '../../DATA/Kitti/object/' label_dir = data_dir + 'training/label_2' image_dir = data_dir + 'training/image_2' filepath = data_dir + 'val.txt' calib_dir = data_dir + 'training/calib' filenames = open(filepath, 'r').readlines() filenames = [int(filename) for filename in filenames] i = 0 i_max = len(filenames) for filename in filenames: ############################## # Ground Truth ############################## # Get ground truth labels gt_objects = obj_utils.read_labels(label_dir, filename) boxes2d, _, _ = obj_utils.build_bbs_from_objects( gt_objects, class_needed=gt_classes) image_path = image_dir + "/%06d.png" % filename image = Image.open(image_path) image_size = image.size prop_fig, prop_2d_axes, prop_3d_axes = \ vis_utils.visualization(image_dir, filename, display=False) # Read the stereo calibration matrix for visualization stereo_calib = calib_utils.read_calibration(calib_dir, filename) calib_p2 = stereo_calib.p2 draw_gt(gt_objects, prop_2d_axes, prop_3d_axes, calib_p2) out_name = output_dir_base + "/%06d.png" % filename plt.savefig(out_name) plt.close(prop_fig) i += 1 print(str(i) + '/' + str(i_max)) print('\nDone')