def get_lidar_point_cloud(img_idx, calib_dir, velo_dir, im_size=None, min_intensity=None): """ Calculates the lidar point cloud, and optionally returns only the points that are projected to the image. :param img_idx: image index :param calib_dir: directory with calibration files :param velo_dir: directory with velodyne files :param im_size: (optional) 2 x 1 list containing the size of the image to filter the point cloud [w, h] :param min_intensity: (optional) minimum intensity required to keep a point :return: (3, N) point_cloud in the form [[x,...][y,...][z,...]] """ # Read calibration info frame_calib = calib_utils.read_calibration(calib_dir, img_idx) x, y, z, i = calib_utils.read_lidar(velo_dir=velo_dir, img_idx=img_idx) # Calculate the point cloud pts = np.vstack((x, y, z)).T pts = calib_utils.lidar_to_cam_frame(pts, frame_calib) # The given image is assumed to be a 2D image if not im_size: point_cloud = pts.T return point_cloud else: # Only keep points in front of camera (positive z) pts = pts[pts[:, 2] > 0] point_cloud = pts.T # Project to image frame point_in_im = calib_utils.project_to_image(point_cloud, p=frame_calib.p2).T # Filter based on the given image size image_filter = (point_in_im[:, 0] > 0) & \ (point_in_im[:, 0] < im_size[0]) & \ (point_in_im[:, 1] > 0) & \ (point_in_im[:, 1] < im_size[1]) if not min_intensity: return pts[image_filter].T else: intensity_filter = i > min_intensity point_filter = np.logical_and(image_filter, intensity_filter) return pts[point_filter].T
def main(): """This demo runs through all samples in the trainval set, and checks that the 3D box projection of all 'Car', 'Van', 'Pedestrian', and 'Cyclist' objects are in the correct flipped 2D location after applying modifications to the stereo p2 matrix. """ dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL, use_defaults=True) np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)}) all_samples = dataset.sample_names all_pixel_errors = [] all_max_pixel_errors = [] total_flip_time = 0.0 for sample_idx in range(dataset.num_samples): sys.stdout.write('\r{} / {}'.format(sample_idx, dataset.num_samples - 1)) sample_name = all_samples[sample_idx] img_idx = int(sample_name) # Run the main loop to run throughout the images frame_calibration_info = calib_utils.read_calibration( dataset.calib_dir, img_idx) # Load labels gt_labels = obj_utils.read_labels(dataset.label_dir, img_idx) gt_labels = dataset.kitti_utils.filter_labels( gt_labels, ['Car', 'Van', 'Pedestrian', 'Cyclist']) image = cv2.imread(dataset.get_rgb_image_path(sample_name)) image_size = [image.shape[1], image.shape[0]] # Flip p2 matrix calib_p2 = frame_calibration_info.p2 flipped_p2 = np.copy(calib_p2) flipped_p2[0, 2] = image.shape[1] - flipped_p2[0, 2] flipped_p2[0, 3] = -flipped_p2[0, 3] for obj_idx in range(len(gt_labels)): obj = gt_labels[obj_idx] # Get original 2D bounding boxes orig_box_3d = box_3d_encoder.object_label_to_box_3d(obj) orig_bbox_2d = box_3d_projector.project_to_image_space( orig_box_3d, calib_p2, truncate=True, image_size=image_size) # Skip boxes outside image if orig_bbox_2d is None: continue orig_bbox_2d_flipped = flip_box_2d(orig_bbox_2d, image_size) # Do flipping start_time = time.time() flipped_obj = kitti_aug.flip_label_in_3d_only(obj) flip_time = time.time() - start_time total_flip_time += flip_time box_3d_flipped = box_3d_encoder.object_label_to_box_3d(flipped_obj) new_bbox_2d_flipped = box_3d_projector.project_to_image_space( box_3d_flipped, flipped_p2, truncate=True, image_size=image_size) pixel_errors = new_bbox_2d_flipped - orig_bbox_2d_flipped max_pixel_error = np.amax(np.abs(pixel_errors)) all_pixel_errors.append(pixel_errors) all_max_pixel_errors.append(max_pixel_error) if max_pixel_error > 5: print(' Error > 5px', sample_idx, max_pixel_error) print(np.round(orig_bbox_2d_flipped, 3), np.round(new_bbox_2d_flipped, 3)) print('Avg flip time:', total_flip_time / dataset.num_samples) # Convert to ndarrays all_pixel_errors = np.asarray(all_pixel_errors) all_max_pixel_errors = np.asarray(all_max_pixel_errors) # Print max values print(np.amax(all_max_pixel_errors)) # Plot pixel errors fig, axes = plt.subplots(nrows=3, ncols=1) ax0, ax1, ax2 = axes.flatten() ax0.hist(all_pixel_errors[:, 0], 50, histtype='bar', facecolor='green') ax1.hist(all_pixel_errors[:, 2], 50, histtype='bar', facecolor='green') ax2.hist(all_max_pixel_errors, 50, histtype='bar', facecolor='green') plt.show()
def main(): """This demo shows p1 proposals and ammf predictions in 3D and 2D in image space. Given certain thresholds for proposals and predictions, it selects and draws the bounding boxes on the image sample. It goes through the entire proposal and prediction samples for the given dataset split. The proposals, overlaid, and prediction images can be toggled on or off separately in the options section. The prediction score and IoU with ground truth can be toggled on or off as well, shown as (score, IoU) above the detection. """ dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL) ############################## # Options ############################## dataset_config.data_split = 'val' #bqx!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! fig_size = (10, 6.1) p1_score_threshold = 0.8 ammf_score_threshold = 0.1 #gt_classes = ['Car'] gt_classes = ['Pedestrian', 'Cyclist'] # Overwrite this to select a specific checkpoint global_step = None #checkpoint_name = 'ammf_cars_example' #checkpoint_name = 'pyramid_cars_with_aug_example' checkpoint_name = 'people' # Drawing Toggles draw_proposals_separate = True draw_overlaid = True draw_predictions_separate = True # Show orientation for both GT and proposals/predictions draw_orientations_on_prop = True draw_orientations_on_pred = True # Draw 2D bounding boxes draw_projected_2d_boxes = True # Save images for samples with no detections save_empty_images = True draw_score = True draw_iou = True ############################## # End of Options ############################## # Get the dataset dataset = DatasetBuilder.build_kitti_dataset(dataset_config) # Setup Paths predictions_dir = ammf.root_dir() + \ '/data/outputs/' + checkpoint_name + '/predictions' proposals_and_scores_dir= predictions_dir + \ '/proposals_and_scores/' + dataset.data_split predictions_and_scores_dir = predictions_dir + \ '/final_predictions_and_scores/' + dataset.data_split # Output images directories output_dir_base = predictions_dir + '/images_2d' # Get checkpoint step #steps = os.listdir() #steps.sort(key=int) #print('Available steps: {}'.format(steps)) # Use latest checkpoint if no index provided if global_step is None: #global_step = steps[-1] global_step = '120000' #!!!!!!!!!!!!!!!!!!!!!! if draw_proposals_separate: prop_out_dir = output_dir_base + '/proposals/{}/{}/{}'.format( dataset.data_split, global_step, p1_score_threshold) if not os.path.exists(prop_out_dir): os.makedirs(prop_out_dir) print('Proposal images saved to:', prop_out_dir) if draw_overlaid: overlaid_out_dir = output_dir_base + '/overlaid/{}/{}/{}'.format( dataset.data_split, global_step, ammf_score_threshold) if not os.path.exists(overlaid_out_dir): os.makedirs(overlaid_out_dir) print('Overlaid images saved to:', overlaid_out_dir) if draw_predictions_separate: pred_out_dir = output_dir_base + '/predictions/{}/{}/{}'.format( dataset.data_split, global_step, ammf_score_threshold) if not os.path.exists(pred_out_dir): os.makedirs(pred_out_dir) print('Prediction images saved to:', pred_out_dir) # Rolling average array of times for time estimation avg_time_arr_length = 10 last_times = np.repeat(time.time(), avg_time_arr_length) + \ np.arange(avg_time_arr_length) for sample_idx in range(dataset.num_samples): # Estimate time remaining with 5 slowest times start_time = time.time() last_times = np.roll(last_times, -1) last_times[-1] = start_time avg_time = np.mean(np.sort(np.diff(last_times))[-5:]) samples_remaining = dataset.num_samples - sample_idx est_time_left = avg_time * samples_remaining # Print progress and time remaining estimate sys.stdout.write('\rSaving {} / {}, Avg Time: {:.3f}s, ' 'Time Remaining: {:.2f}s'.format( sample_idx + 1, dataset.num_samples, avg_time, est_time_left)) sys.stdout.flush() #sample_idx=188 sample_name = dataset.sample_names[sample_idx] img_idx = int(sample_name) #img_idx = 188 #bqx!!!!!!!!!!!!!!!!!!!!111 ############################## # Proposals ############################## if draw_proposals_separate or draw_overlaid: # Load proposals from files proposals_file_path = proposals_and_scores_dir + \ "/{}/{}.txt".format(global_step, sample_name) if not os.path.exists(proposals_file_path): print('Sample {}: No proposals, skipping'.format(sample_name)) continue print('Sample {}: Drawing proposals'.format(sample_name)) proposals_and_scores = np.loadtxt(proposals_file_path) proposal_boxes_3d = proposals_and_scores[:, 0:7] proposal_scores = proposals_and_scores[:, 7] # Apply score mask to proposals score_mask = proposal_scores > p1_score_threshold proposal_boxes_3d = proposal_boxes_3d[score_mask] proposal_scores = proposal_scores[score_mask] proposal_objs = \ [box_3d_encoder.box_3d_to_object_label(proposal, obj_type='Proposal') for proposal in proposal_boxes_3d] ############################## # Predictions ############################## if draw_predictions_separate or draw_overlaid: predictions_file_path = predictions_and_scores_dir + \ "/{}/{}.txt".format(global_step, sample_name) if not os.path.exists(predictions_file_path): continue # Load predictions from files predictions_and_scores = np.loadtxt( predictions_and_scores_dir + "/{}/{}.txt".format(global_step, sample_name)) prediction_boxes_3d = predictions_and_scores[:, 0:7] prediction_scores = predictions_and_scores[:, 7] prediction_class_indices = predictions_and_scores[:, 8] # process predictions only if we have any predictions left after # masking if len(prediction_boxes_3d) > 0: # Apply score mask ammf_score_mask = prediction_scores >= ammf_score_threshold prediction_boxes_3d = prediction_boxes_3d[ammf_score_mask] prediction_scores = prediction_scores[ammf_score_mask] prediction_class_indices = \ prediction_class_indices[ammf_score_mask] # # Swap l, w for predictions where w > l # swapped_indices = \ # prediction_boxes_3d[:, 4] > prediction_boxes_3d[:, 3] # prediction_boxes_3d = np.copy(prediction_boxes_3d) # prediction_boxes_3d[swapped_indices, 3] = \ # prediction_boxes_3d[swapped_indices, 4] # prediction_boxes_3d[swapped_indices, 4] = \ # prediction_boxes_3d[swapped_indices, 3] ############################## # Ground Truth ############################## # Get ground truth labels if dataset.has_labels: gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx) else: gt_objects = [] # Filter objects to desired difficulty filtered_gt_objs = dataset.kitti_utils.filter_labels( gt_objects, classes=gt_classes) boxes2d, _, _ = obj_utils.build_bbs_from_objects( filtered_gt_objs, class_needed=gt_classes) image_path = dataset.get_rgb_image_path(sample_name) image = Image.open(image_path) image_size = image.size # Read the stereo calibration matrix for visualization stereo_calib = calib_utils.read_calibration(dataset.calib_dir, img_idx) calib_p2 = stereo_calib.p2 ############################## # Reformat and prepare to draw ############################## if draw_proposals_separate or draw_overlaid: proposals_as_anchors = box_3d_encoder.box_3d_to_anchor( proposal_boxes_3d) proposal_boxes, _ = anchor_projector.project_to_image_space( proposals_as_anchors, calib_p2, image_size) num_of_proposals = proposal_boxes_3d.shape[0] prop_fig, prop_2d_axes, prop_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False) draw_proposals(filtered_gt_objs, calib_p2, num_of_proposals, proposal_objs, proposal_boxes, prop_2d_axes, prop_3d_axes, draw_orientations_on_prop) if draw_proposals_separate: # Save just the proposals filename = prop_out_dir + '/' + sample_name + '.png' plt.savefig(filename) if not draw_overlaid: plt.close(prop_fig) if draw_overlaid or draw_predictions_separate: if len(prediction_boxes_3d) > 0: # Project the 3D box predictions to image space image_filter = [] final_boxes_2d = [] for i in range(len(prediction_boxes_3d)): box_3d = prediction_boxes_3d[i, 0:7] img_box = box_3d_projector.project_to_image_space( box_3d, calib_p2, truncate=True, image_size=image_size, discard_before_truncation=False) if img_box is not None: image_filter.append(True) final_boxes_2d.append(img_box) else: image_filter.append(False) final_boxes_2d = np.asarray(final_boxes_2d) final_prediction_boxes_3d = prediction_boxes_3d[image_filter] final_scores = prediction_scores[image_filter] final_class_indices = prediction_class_indices[image_filter] num_of_predictions = final_boxes_2d.shape[0] # Convert to objs final_prediction_objs = \ [box_3d_encoder.box_3d_to_object_label( prediction, obj_type='Prediction') for prediction in final_prediction_boxes_3d] for (obj, score) in zip(final_prediction_objs, final_scores): obj.score = score else: if save_empty_images: pred_fig, pred_2d_axes, pred_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False, fig_size=fig_size) filename = pred_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(pred_fig) continue if draw_overlaid: # Overlay prediction boxes on image draw_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, prop_2d_axes, prop_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred) filename = overlaid_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(prop_fig) if draw_predictions_separate: # Now only draw prediction boxes on images # on a new figure handler if draw_projected_2d_boxes: pred_fig, pred_2d_axes, pred_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False, fig_size=fig_size) draw_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, pred_2d_axes, pred_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred) else: pred_fig, pred_3d_axes = \ vis_utils.visualize_single_plot( dataset.rgb_image_dir, img_idx, display=False) draw_3d_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, pred_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred) filename = pred_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(pred_fig) print('\nDone')
def main(): """ Converts a set of network predictions into text files required for KITTI evaluation. """ ############################## # Options ############################## checkpoint_name = 'ammf_cars_example' data_split = 'val' global_steps = None # global_steps = [28000, 19000, 33000, 34000] score_threshold = 0.1 save_2d = False # Save 2D predictions save_3d = True # Save 2D and 3D predictions together save_alphas = True # Save alphas (observation angles) # Checkpoints below this are skipped min_step = 20000 ############################## # End of Options ############################## # Parse experiment config pipeline_config_file = \ ammf.root_dir() + '/data/outputs/' + checkpoint_name + \ '/' + checkpoint_name + '.config' _, _, _, dataset_config = \ config_builder_util.get_configs_from_pipeline_file( pipeline_config_file, is_training=False) # Overwrite defaults dataset_config = config_builder_util.proto_to_obj(dataset_config) dataset_config.data_split = data_split dataset_config.aug_list = [] if data_split == 'test': dataset_config.data_split_dir = 'testing' dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) # Get available prediction folders predictions_root_dir = ammf.root_dir() + '/data/outputs/' + \ checkpoint_name + '/predictions' final_predictions_root_dir = predictions_root_dir + \ '/final_predictions_and_scores/' + dataset.data_split print('Converting detections from', final_predictions_root_dir) if not global_steps: global_steps = os.listdir(final_predictions_root_dir) global_steps.sort(key=int) print('Checkpoints found ', global_steps) for step_idx in range(len(global_steps)): global_step = global_steps[step_idx] # Skip first checkpoint if int(global_step) < min_step: continue final_predictions_dir = final_predictions_root_dir + \ '/' + str(global_step) # 2D and 3D prediction directories kitti_predictions_2d_dir = predictions_root_dir + \ '/kitti_predictions_2d/' + \ dataset.data_split + '/' + \ str(score_threshold) + '/' + \ str(global_step) + '/data' kitti_predictions_3d_dir = predictions_root_dir + \ '/kitti_predictions_3d/' + \ dataset.data_split + '/' + \ str(score_threshold) + '/' + \ str(global_step) + '/data' if save_2d and not os.path.exists(kitti_predictions_2d_dir): os.makedirs(kitti_predictions_2d_dir) if save_3d and not os.path.exists(kitti_predictions_3d_dir): os.makedirs(kitti_predictions_3d_dir) # Do conversion num_samples = dataset.num_samples num_valid_samples = 0 print('\nGlobal step:', global_step) print('Converting detections from:', final_predictions_dir) if save_2d: print('2D Detections saved to:', kitti_predictions_2d_dir) if save_3d: print('3D Detections saved to:', kitti_predictions_3d_dir) for sample_idx in range(num_samples): # Print progress sys.stdout.write('\rConverting {} / {}'.format( sample_idx + 1, num_samples)) sys.stdout.flush() sample_name = dataset.sample_names[sample_idx] prediction_file = sample_name + '.txt' kitti_predictions_2d_file_path = kitti_predictions_2d_dir + \ '/' + prediction_file kitti_predictions_3d_file_path = kitti_predictions_3d_dir + \ '/' + prediction_file predictions_file_path = final_predictions_dir + \ '/' + prediction_file # If no predictions, skip to next file if not os.path.exists(predictions_file_path): if save_2d: np.savetxt(kitti_predictions_2d_file_path, []) if save_3d: np.savetxt(kitti_predictions_3d_file_path, []) continue all_predictions = np.loadtxt(predictions_file_path) # # Swap l, w for predictions where w > l # swapped_indices = all_predictions[:, 4] > all_predictions[:, 3] # fixed_predictions = np.copy(all_predictions) # fixed_predictions[swapped_indices, 3] = all_predictions[ # swapped_indices, 4] # fixed_predictions[swapped_indices, 4] = all_predictions[ # swapped_indices, 3] score_filter = all_predictions[:, 7] >= score_threshold all_predictions = all_predictions[score_filter] # If no predictions, skip to next file if len(all_predictions) == 0: if save_2d: np.savetxt(kitti_predictions_2d_file_path, []) if save_3d: np.savetxt(kitti_predictions_3d_file_path, []) continue # Project to image space sample_name = prediction_file.split('.')[0] img_idx = int(sample_name) # Load image for truncation image = Image.open(dataset.get_rgb_image_path(sample_name)) stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 boxes = [] image_filter = [] for i in range(len(all_predictions)): box_3d = all_predictions[i, 0:7] img_box = box_3d_projector.project_to_image_space( box_3d, stereo_calib_p2, truncate=True, image_size=image.size) # Skip invalid boxes (outside image space) if img_box is None: image_filter.append(False) else: image_filter.append(True) boxes.append(img_box) boxes = np.asarray(boxes) all_predictions = all_predictions[image_filter] # If no predictions, skip to next file if len(boxes) == 0: if save_2d: np.savetxt(kitti_predictions_2d_file_path, []) if save_3d: np.savetxt(kitti_predictions_3d_file_path, []) continue num_valid_samples += 1 # To keep each value in its appropriate position, an array of zeros # (N, 16) is allocated but only values [4:16] are used kitti_predictions = np.zeros([len(boxes), 16]) # Get object types all_pred_classes = all_predictions[:, 8].astype(np.int32) obj_types = [dataset.classes[class_idx] for class_idx in all_pred_classes] # Truncation and Occlusion are always empty (see below) # Alpha if not save_alphas: kitti_predictions[:, 3] = -10 * \ np.ones((len(kitti_predictions)), dtype=np.int32) else: alphas = all_predictions[:, 6] - \ np.arctan2(all_predictions[:, 0], all_predictions[:, 2]) kitti_predictions[:, 3] = alphas # 2D predictions kitti_predictions[:, 4:8] = boxes[:, 0:4] # 3D predictions # (l, w, h) kitti_predictions[:, 8] = all_predictions[:, 5] kitti_predictions[:, 9] = all_predictions[:, 4] kitti_predictions[:, 10] = all_predictions[:, 3] # (x, y, z) kitti_predictions[:, 11:14] = all_predictions[:, 0:3] # (ry, score) kitti_predictions[:, 14:16] = all_predictions[:, 6:8] # Round detections to 3 decimal places kitti_predictions = np.round(kitti_predictions, 3) # Empty Truncation, Occlusion kitti_empty_1 = -1 * np.ones((len(kitti_predictions), 2), dtype=np.int32) # Empty 3D (x, y, z) kitti_empty_2 = -1 * np.ones((len(kitti_predictions), 3), dtype=np.int32) # Empty 3D (h, w, l) kitti_empty_3 = -1000 * np.ones((len(kitti_predictions), 3), dtype=np.int32) # Empty 3D (ry) kitti_empty_4 = -10 * np.ones((len(kitti_predictions), 1), dtype=np.int32) # Stack 2D predictions text kitti_text_2d = np.column_stack([obj_types, kitti_empty_1, kitti_predictions[:, 3:8], kitti_empty_2, kitti_empty_3, kitti_empty_4, kitti_predictions[:, 15]]) # Stack 3D predictions text kitti_text_3d = np.column_stack([obj_types, kitti_empty_1, kitti_predictions[:, 3:16]]) # Save to text files if save_2d: np.savetxt(kitti_predictions_2d_file_path, kitti_text_2d, newline='\r\n', fmt='%s') if save_3d: np.savetxt(kitti_predictions_3d_file_path, kitti_text_3d, newline='\r\n', fmt='%s') print('\nNum valid:', num_valid_samples) print('Num samples:', num_samples)
def main(): """ Visualization of 3D grid anchor generation, showing 2D projections in BEV and image space, and a 3D display of the anchors """ dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_TRAIN) dataset_config.num_clusters[0] = 1 dataset = DatasetBuilder.build_kitti_dataset(dataset_config) label_cluster_utils = LabelClusterUtils(dataset) clusters, _ = label_cluster_utils.get_clusters() # Options img_idx = 1 # fake_clusters = np.array([[5, 4, 3], [6, 5, 4]]) # fake_clusters = np.array([[3, 3, 3], [4, 4, 4]]) fake_clusters = np.array([[4, 2, 3]]) fake_anchor_stride = [5.0, 5.0] ground_plane = [0, -1, 0, 1.72] anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() area_extents = np.array([[-40, 40], [-5, 5], [0, 70]]) # Generate anchors for cars only start_time = time.time() anchor_boxes_3d = anchor_3d_generator.generate( area_3d=dataset.kitti_utils.area_extents, anchor_3d_sizes=fake_clusters, anchor_stride=fake_anchor_stride, ground_plane=ground_plane) all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d) end_time = time.time() print("Anchors generated in {} s".format(end_time - start_time)) # Project into bev bev_boxes, bev_normalized_boxes = \ anchor_projector.project_to_bev(all_anchors, area_extents[[0, 2]]) bev_fig, (bev_axes, bev_normalized_axes) = \ plt.subplots(1, 2, figsize=(16, 7)) bev_axes.set_xlim(0, 80) bev_axes.set_ylim(70, 0) bev_normalized_axes.set_xlim(0, 1.0) bev_normalized_axes.set_ylim(1, 0.0) plt.show(block=False) for box in bev_boxes: box_w = box[2] - box[0] box_h = box[3] - box[1] rect = patches.Rectangle((box[0], box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_axes.add_patch(rect) for normalized_box in bev_normalized_boxes: box_w = normalized_box[2] - normalized_box[0] box_h = normalized_box[3] - normalized_box[1] rect = patches.Rectangle((normalized_box[0], normalized_box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_normalized_axes.add_patch(rect) rgb_fig, rgb_2d_axes, rgb_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx) plt.show(block=False) image_path = dataset.get_rgb_image_path(dataset.sample_names[img_idx]) image_shape = np.array(Image.open(image_path)).shape stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 start_time = time.time() rgb_boxes, rgb_normalized_boxes = \ anchor_projector.project_to_image_space(all_anchors, stereo_calib_p2, image_shape) end_time = time.time() print("Anchors projected in {} s".format(end_time - start_time)) # Read the stereo calibration matrix for visualization stereo_calib = calib_utils.read_calibration(dataset.calib_dir, 0) p = stereo_calib.p2 # Overlay boxes on images for anchor_idx in range(len(anchor_boxes_3d)): anchor_box_3d = anchor_boxes_3d[anchor_idx] obj_label = box_3d_encoder.box_3d_to_object_label(anchor_box_3d) # Draw 3D boxes vis_utils.draw_box_3d(rgb_3d_axes, obj_label, p) # Draw 2D boxes rgb_box_2d = rgb_boxes[anchor_idx] box_x1 = rgb_box_2d[0] box_y1 = rgb_box_2d[1] box_w = rgb_box_2d[2] - box_x1 box_h = rgb_box_2d[3] - box_y1 rect = patches.Rectangle((box_x1, box_y1), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') rgb_2d_axes.add_patch(rect) if anchor_idx % 32 == 0: rgb_fig.canvas.draw() plt.show(block=True)
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name #8.1 labels # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) #2. Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) #cv_bgr_image = plt.imread(self.get_rgb_image_path( #sample_name)) print(cv_bgr_image.shape) print("cv_bgr_image.shape") rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image #.Load seg (BGR -> RGB) #cv_bgr_seg = cv2.imread(self.get_rgb_seg_path(sample_name)) seg_input = 0 label_seg_input = 0 seg_filelist = os.listdir( '/media/bangquanxie/4FCF996C7FA0ED8D/Kitti/object/training/image_seg_2' ) if int(sample_name) < len(seg_filelist): cv_bgr_seg = plt.imread(self.get_rgb_seg_path(sample_name)) print(cv_bgr_seg.shape) print("cv_bgr_seg.shape") rgb_seg = cv_bgr_seg[..., ::-1] seg_shape = rgb_seg.shape[0:2] seg_input = rgb_seg cv_bgr_label_seg = plt.imread( self.get_rgb_label_seg_path(sample_name)) print(cv_bgr_seg.shape) print("cv_bgr_seg.shape") rgb_label_seg = cv_bgr_label_seg[..., ::-1] label_seg_shape = rgb_label_seg.shape[0:2] label_seg_input = rgb_label_seg ''' #.Load label_seg (BGR -> RGB) self.label_seg_dir cv_bgr_label_seg = cv2.imread(self.get_rgb_label_seg_path(sample_name)) #cv_bgr_image = plt.imread(self.get_rgb_image_path( #sample_name)) rgb_label_seg = cv_bgr_label_seg[..., :: -1] label_seg_shape = rgb_label_seg.shape[0:2] label_seg_input = rgb_label_seg #bqx:road dir_seg = 0 dir_seg = self.image_dir_seg dir_segs = obj_utils.get_rgb_image_path_seg(self.image_dir_seg) for i in range(len(dir_segs)): dir_seg=dir_segs[i] rgb_image = plt.imread(dir_seg) #rgb_image = cv_bgr_image[..., :: -1] image_shape = rgb_image.shape[0:2] image_input_seg = rgb_image ''' # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib_p2 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p2 point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) #bqx: if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) #seg_label = np.asarray(seg_label, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, #bqx:road constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, #bqx:road constants.KEY_SEG_INPUT: seg_input, constants.KEY_LABEL_SEG: label_seg_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def save_predictions_in_kitti_format(model, checkpoint_name, data_split, score_threshold, global_step): """ Converts a set of network predictions into text files required for KITTI evaluation. """ dataset = model.dataset # Round this because protobuf encodes default values as full decimal score_threshold = round(score_threshold, 3) # Get available prediction folders predictions_root_dir = ammf.root_dir() + '/data/outputs/' + \ checkpoint_name + '/predictions' final_predictions_root_dir = predictions_root_dir + \ '/final_predictions_and_scores/' + dataset.data_split final_predictions_dir = final_predictions_root_dir + \ '/' + str(global_step) # 3D prediction directories kitti_predictions_3d_dir = predictions_root_dir + \ '/kitti_native_eval/' + \ str(score_threshold) + '/' + \ str(global_step) + '/data' if not os.path.exists(kitti_predictions_3d_dir): os.makedirs(kitti_predictions_3d_dir) # Do conversion num_samples = dataset.num_samples num_valid_samples = 0 print('\nGlobal step:', global_step) print('Converting detections from:', final_predictions_dir) print('3D Detections being saved to:', kitti_predictions_3d_dir) for sample_idx in range(num_samples): # Print progress sys.stdout.write('\rConverting {} / {}'.format(sample_idx + 1, num_samples)) sys.stdout.flush() sample_name = dataset.sample_names[sample_idx] prediction_file = sample_name + '.txt' kitti_predictions_3d_file_path = kitti_predictions_3d_dir + \ '/' + prediction_file predictions_file_path = final_predictions_dir + \ '/' + prediction_file # If no predictions, skip to next file if not os.path.exists(predictions_file_path): np.savetxt(kitti_predictions_3d_file_path, []) continue all_predictions = np.loadtxt(predictions_file_path) # # Swap l, w for predictions where w > l #swapped_indices = all_predictions[:, 4] > all_predictions[:, 3] #fixed_predictions = np.copy(all_predictions) #fixed_predictions[swapped_indices, 3] = all_predictions[ # swapped_indices, 4] #fixed_predictions[swapped_indices, 4] = all_predictions[ # swapped_indices, 3] score_filter = all_predictions[:, 7] >= score_threshold all_predictions = all_predictions[score_filter] # If no predictions, skip to next file if len(all_predictions) == 0: np.savetxt(kitti_predictions_3d_file_path, []) continue # Project to image space sample_name = prediction_file.split('.')[0] img_idx = int(sample_name) # Load image for truncation image = Image.open(dataset.get_rgb_image_path(sample_name)) stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 boxes = [] image_filter = [] for i in range(len(all_predictions)): box_3d = all_predictions[i, 0:7] img_box = box_3d_projector.project_to_image_space( box_3d, stereo_calib_p2, truncate=True, image_size=image.size) # Skip invalid boxes (outside image space) if img_box is None: image_filter.append(False) continue image_filter.append(True) boxes.append(img_box) boxes = np.asarray(boxes) all_predictions = all_predictions[image_filter] # If no predictions, skip to next file if len(boxes) == 0: np.savetxt(kitti_predictions_3d_file_path, []) continue num_valid_samples += 1 # To keep each value in its appropriate position, an array of zeros # (N, 16) is allocated but only values [4:16] are used kitti_predictions = np.zeros([len(boxes), 16]) # Get object types all_pred_classes = all_predictions[:, 8].astype(np.int32) obj_types = [ dataset.classes[class_idx] for class_idx in all_pred_classes ] # Truncation and Occlusion are always empty (see below) # Alpha (Not computed) kitti_predictions[:, 3] = -10 * np.ones( (len(kitti_predictions)), dtype=np.int32) # 2D predictions kitti_predictions[:, 4:8] = boxes[:, 0:4] # 3D predictions # (l, w, h) kitti_predictions[:, 8] = all_predictions[:, 5] kitti_predictions[:, 9] = all_predictions[:, 4] kitti_predictions[:, 10] = all_predictions[:, 3] # (x, y, z) kitti_predictions[:, 11:14] = all_predictions[:, 0:3] # (ry, score) kitti_predictions[:, 14:16] = all_predictions[:, 6:8] # Round detections to 3 decimal places kitti_predictions = np.round(kitti_predictions, 3) # Empty Truncation, Occlusion kitti_empty_1 = -1 * np.ones( (len(kitti_predictions), 2), dtype=np.int32) # Stack 3D predictions text kitti_text_3d = np.column_stack( [obj_types, kitti_empty_1, kitti_predictions[:, 3:16]]) # Save to text files np.savetxt(kitti_predictions_3d_file_path, kitti_text_3d, newline='\r\n', fmt='%s') print('\nNum valid:', num_valid_samples) print('Num samples:', num_samples)