def test_data_splits(self): bad_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_UNITTEST) # Test invalid splits bad_config.data_split = "bad" self.assertRaises(ValueError, KittiDataset, bad_config) # Should be "train" bad_config.data_split = "training" self.assertRaises(ValueError, KittiDataset, bad_config) # Should be "val" bad_config.data_split = "validation" self.assertRaises(ValueError, KittiDataset, bad_config) # Should be "test" bad_config.data_split = "testing" self.assertRaises(ValueError, KittiDataset, bad_config) # Train split train_dataset = self.get_fake_dataset('train', self.fake_kitti_dir) self.assertEqual(train_dataset.num_samples, 7) # Validation split validation_dataset = self.get_fake_dataset('val', self.fake_kitti_dir) self.assertEqual(validation_dataset.num_samples, 6) # Train + validation split trainval_dataset = self.get_fake_dataset('trainval', self.fake_kitti_dir) self.assertEqual(trainval_dataset.num_samples, 13) # Test split test_dataset = self.get_fake_dataset('test', self.fake_kitti_dir) self.assertEqual(test_dataset.num_samples, 10)
def main(): dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_UNITTEST) dataset_config.data_split = "trainval" unittest_dataset = DatasetBuilder.build_kitti_dataset(dataset_config) gen_label_clusters.main(unittest_dataset) gen_mini_batches.main(unittest_dataset)
def get_fake_dataset(self, data_split, directory): dataset_config = DatasetBuilder.copy_config( DatasetBuilder.KITTI_UNITTEST) # Overwrite config values dataset_config.data_split = data_split dataset_config.dataset_dir = directory dataset = DatasetBuilder.build_kitti_dataset(dataset_config) return dataset
def test_project_to_image_space_tensors(self): anchors = np.asarray([[0, 0, 3, 2, 0, 6], [3, 0, 3, 2, 0, 2]], dtype=np.float64) img_idx = int('000217') img_shape = [375, 1242] dataset_config = DatasetBuilder.copy_config( DatasetBuilder.KITTI_UNITTEST) dataset_config.data_split = 'train' dataset_config.dataset_dir = tests.test_path() + \ "/datasets/Kitti/object" dataset = DatasetBuilder().build_kitti_dataset(dataset_config) stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 # Project the 3D points in numpy space img_corners, img_corners_norm = anchor_projector.project_to_image_space( anchors, stereo_calib_p2, img_shape) # convert the required params to tensors tf_stereo_calib_p2 = tf.convert_to_tensor(stereo_calib_p2, dtype=tf.float32) tf_anchors = tf.convert_to_tensor(anchors, dtype=tf.float32) tf_img_shape = tf.convert_to_tensor(img_shape, dtype=tf.float32) # Project the 3D points in tensor space img_corners_tensor, img_corners_norm_tensor = \ anchor_projector.tf_project_to_image_space(tf_anchors, tf_stereo_calib_p2, tf_img_shape) sess = tf.Session() with sess.as_default(): img_corners_out = img_corners_tensor.eval() img_corners_norm_out = img_corners_norm_tensor.eval() np.testing.assert_allclose(img_corners, img_corners_out, atol=1e-04, err_msg='Incorrect corner projection') np.testing.assert_allclose( img_corners_norm, img_corners_norm_out, atol=1e-04, err_msg='Incorrect normalized corner projection')
def main(): """This demo visualizes box 8C format predicted by MLOD, before getting converted to Box 3D. Keys: F1: Toggle predictions F2: Toggle easy ground truth objects (Green) F3: Toggle medium ground truth objects (Orange) F4: Toggle hard ground truth objects (Red) F5: Toggle all ground truth objects (default off) F6: Toggle 3D voxel grid F7: Toggle point cloud """ ############################## # Options ############################## mlod_score_threshold = 0.1 show_orientations = True checkpoint_name = 'mlod_exp_8c' global_step = None sample_name = None dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL_HALF) dataset = DatasetBuilder.build_kitti_dataset(dataset_config) ############################## # Setup Paths ############################## # # # Cars # # # # sample_name = '000050' # sample_name = '000104' # sample_name = '000169' # sample_name = '000191' # sample_name = '000360' # sample_name = '001783' # sample_name = '001820' # sample_name = '006338' # # # People # # # # val_half split # sample_name = '000001' sample_name = '000005' # Easy, 1 ped # sample_name = '000122' # Easy, 1 cyc # sample_name = '000134' # Hard, lots of people # sample_name = '000167' # Medium, 1 ped, 2 cycs # sample_name = '000187' # Medium, 1 ped on left # sample_name = '000381' # Easy, 1 ped # sample_name = '000398' # Easy, 1 ped # sample_name = '000401' # Hard, obscured peds # sample_name = '000407' # Easy, 1 ped # sample_name = '000448' # Hard, several far people # sample_name = '000486' # Hard 2 obscured peds # sample_name = '000509' # Easy, 1 ped # sample_name = '000718' # Hard, lots of people # sample_name = '002216' # Easy, 1 cyc # Random sample if sample_name is None: sample_idx = np.random.randint(0, dataset.num_samples) sample_name = dataset.sample_list[sample_idx] img_idx = int(sample_name) # Text files directory predictions_and_scores_dir = mlod.root_dir() + \ '/data/outputs/' + checkpoint_name + '/predictions' + \ '/final_boxes_8c_and_scores/' + dataset.data_split # Get checkpoint step steps = os.listdir(predictions_and_scores_dir) steps.sort(key=int) print('Available steps: {}'.format(steps)) # Use latest checkpoint if no index provided if global_step is None: global_step = steps[-1] ############################## # predictions ############################## # Load predictions from files predictions_and_scores = np.loadtxt( predictions_and_scores_dir + "/{}/{}.txt".format(global_step, sample_name)) predictions_boxes_8c = predictions_and_scores[:, 0:24] prediction_scores = predictions_and_scores[:, 24] score_mask = prediction_scores >= mlod_score_threshold predictions_boxes_8c = predictions_boxes_8c[score_mask] all_vtk_box_corners = [] predictions_boxes_8c = np.reshape(predictions_boxes_8c, [-1, 3, 8]) for i in range(len(predictions_boxes_8c)): box_8c = predictions_boxes_8c[i, :, :] vtk_box_corners = VtkBox8c() vtk_box_corners.set_objects(box_8c) all_vtk_box_corners.append(vtk_box_corners) ############################## # Ground Truth ############################## if dataset.has_labels: easy_gt_objs, medium_gt_objs, \ hard_gt_objs, all_gt_objs = \ demo_utils.get_gts_based_on_difficulty(dataset, img_idx) else: easy_gt_objs = medium_gt_objs = hard_gt_objs = all_gt_objs = [] ############################## # Point Cloud ############################## image_path = dataset.get_rgb_image_path(sample_name) image = cv2.imread(image_path) img_idx = int(sample_name) points, point_colours = demo_utils.get_filtered_pc_and_colours( dataset, image, img_idx) # Voxelize the point cloud for visualization voxel_grid = VoxelGrid() voxel_grid.voxelize(points, voxel_size=0.1, create_leaf_layout=False) ############################## # Visualization ############################## # Create VtkVoxelGrid vtk_voxel_grid = VtkVoxelGrid() vtk_voxel_grid.set_voxels(voxel_grid) vtk_point_cloud = VtkPointCloud() vtk_point_cloud.set_points(points, point_colours) # Create VtkAxes axes = vtk.vtkAxesActor() axes.SetTotalLength(5, 5, 5) # Create VtkBoxes for ground truth vtk_easy_gt_boxes, vtk_medium_gt_boxes, \ vtk_hard_gt_boxes, vtk_all_gt_boxes = \ demo_utils.create_gt_vtk_boxes(easy_gt_objs, medium_gt_objs, hard_gt_objs, all_gt_objs, show_orientations) # Create Voxel Grid Renderer in bottom half vtk_renderer = vtk.vtkRenderer() vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor) vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_box_actors = vtk.vtkAssembly() # Create VtkBoxes for prediction boxes for i in range(len(all_vtk_box_corners)): # Adding labels, slows down rendering # vtk_renderer.AddActor(all_vtk_box_corners[i]. # vtk_text_labels.vtk_actor) vtk_box_actors.AddPart(all_vtk_box_corners[i].vtk_actor) vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor) vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_renderer.SetBackground(0.2, 0.3, 0.4) vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor) vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_renderer.AddActor(vtk_hard_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_medium_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_easy_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_all_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_box_actors) vtk_renderer.AddActor(axes) # Set initial properties for some actors vtk_point_cloud.vtk_actor.GetProperty().SetPointSize(2) vtk_voxel_grid.vtk_actor.SetVisibility(0) vtk_all_gt_boxes.vtk_actor.SetVisibility(0) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(160.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(2.5) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() vtk_render_window.SetWindowName( "Predictions: Step {}, Sample {}, Min Score {}".format( global_step, sample_name, mlod_score_threshold, )) vtk_render_window.SetSize(900, 600) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) vtk_render_window_interactor.SetInteractorStyle( vis_utils.ToggleActorsInteractorStyle([ vtk_box_actors, vtk_easy_gt_boxes.vtk_actor, vtk_medium_gt_boxes.vtk_actor, vtk_hard_gt_boxes.vtk_actor, vtk_all_gt_boxes.vtk_actor, vtk_voxel_grid.vtk_actor, vtk_point_cloud.vtk_actor ])) vtk_render_window_interactor.Start()
def main(): """ Displays the bird's eye view maps for a KITTI sample. """ ############################## # Options ############################## # one of ['height_priors', 'slices'] bev_generator = 'slices' height_priors_config = \ """ height_priors { ground_filter_offset: 0.2 offset_filter_distance: 2.0 std_dev_multiplier: 2.0 } """ slices_config = \ """ slices { height_lo: -0.2 height_hi: 2.3 num_slices: 5 } """ # Use None for a random image img_idx = None # img_idx = 142 # img_idx = 191 show_ground_truth = True # Whether to overlay ground_truth boxes point_cloud_source = 'lidar' ############################## # End of Options ############################## dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL) dataset_config = DatasetBuilder.merge_defaults(dataset_config) # Overwrite bev_generator if bev_generator == 'height_priors': text_format.Merge(height_priors_config, dataset_config.kitti_utils_config.bev_generator) elif bev_generator == 'slices': text_format.Merge(slices_config, dataset_config.kitti_utils_config.bev_generator) else: raise ValueError('Invalid bev_generator') dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) if img_idx is None: img_idx = int(random.random() * dataset.num_samples) sample_name = "{:06}".format(img_idx) print('=== Showing BEV maps for image: {}.png ==='.format(sample_name)) # Load image image = cv2.imread(dataset.get_rgb_image_path(sample_name)) image_shape = image.shape[0:2] kitti_utils = dataset.kitti_utils point_cloud = kitti_utils.get_point_cloud(point_cloud_source, img_idx, image_shape) ground_plane = kitti_utils.get_ground_plane(sample_name) bev_images = kitti_utils.create_bev_maps(point_cloud, ground_plane) height_maps = np.array(bev_images.get("height_maps")) density_map = np.array(bev_images.get("density_map")) box_points, box_points_norm = [None, None] if show_ground_truth: # Get projected boxes obj_labels = obj_utils.read_labels(dataset.label_dir, img_idx) filtered_objs = obj_labels label_boxes = [] for label in filtered_objs: box = box_3d_encoder.object_label_to_box_3d(label) label_boxes.append(box) label_boxes = np.array(label_boxes) box_points, box_points_norm = box_3d_projector.project_to_bev( label_boxes, [[-40, 40], [0, 70]]) rgb_img_size = (np.array((1242, 375)) * 0.75).astype(np.int16) img_x_start = 60 img_y_start = 330 img_x = img_x_start img_y = img_y_start img_w = 400 img_h = 350 img_titlebar_h = 20 # Show images vis_utils.cv2_show_image("Image", image, size_wh=rgb_img_size, location_xy=(img_x, 0)) # Height maps for map_idx in range(len(height_maps)): height_map = height_maps[map_idx] height_map = draw_boxes(height_map, box_points_norm) vis_utils.cv2_show_image("Height Map {}".format(map_idx), height_map, size_wh=(img_w, img_h), location_xy=(img_x, img_y)) img_x += img_w # Wrap around if (img_x + img_w) > 1920: img_x = img_x_start img_y += img_h + img_titlebar_h # Density map density_map = draw_boxes(density_map, box_points_norm) vis_utils.cv2_show_image("Density Map", density_map, size_wh=(img_w, img_h), location_xy=(img_x, img_y)) cv2.waitKey()
def setUpClass(cls): dataset_config = DatasetBuilder.copy_config( DatasetBuilder.KITTI_UNITTEST) cls.dataset = DatasetBuilder.build_kitti_dataset(dataset_config) cls.label_dir = cls.dataset.label_dir
def main(): """This demo shows RPN proposals and MLOD predictions in 3D and 2D in image space. Given certain thresholds for proposals and predictions, it selects and draws the bounding boxes on the image sample. It goes through the entire proposal and prediction samples for the given dataset split. The proposals, overlaid, and prediction images can be toggled on or off separately in the options section. The prediction score and IoU with ground truth can be toggled on or off as well, shown as (score, IoU) above the detection. """ dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL) ############################## # Options ############################## dataset_config.data_split = 'val' fig_size = (10, 6.1) rpn_score_threshold = 0.1 mlod_score_threshold = 0.1 # Flag for projecting the 3D boxes to image space # in tensor format (for testing purposes) test_img_tensor_projection = False gt_classes = ['Pedestrian', 'Cyclist'] # gt_classes = ['Pedestrian', 'Cyclist'] # Overwrite this to select a specific checkpoint global_step = 44000 checkpoint_name = 'mlod_fpn_people' # Drawing Toggles draw_proposals_separate = False draw_overlaid = False draw_predictions_separate = True # Show orientation for both GT and proposals/predictions draw_orientations_on_prop = False draw_orientations_on_pred = False # Draw 2D bounding boxes draw_projected_2d_boxes = False # Save images for samples with no detections save_empty_images = True draw_score = True draw_iou = False iou_3d = False ############################## # End of Options ############################## # Get the dataset dataset = DatasetBuilder.build_kitti_dataset(dataset_config) # Setup Paths predictions_dir = mlod.root_dir() + \ '/data/outputs/' + checkpoint_name + '/predictions' proposals_and_scores_dir = predictions_dir + \ '/proposals_and_scores/' + dataset.data_split predictions_and_scores_dir = predictions_dir + \ '/final_predictions_and_scores/' + dataset.data_split # Output images directories output_dir_base = predictions_dir + '/images_2d' # Get checkpoint step steps = os.listdir(proposals_and_scores_dir) steps.sort(key=int) print('Available steps: {}'.format(steps)) # Use latest checkpoint if no index provided if global_step is None: global_step = steps[-1] if draw_proposals_separate: prop_out_dir = output_dir_base + '/proposals/{}/{}/{}'.format( dataset.data_split, global_step, rpn_score_threshold) if not os.path.exists(prop_out_dir): os.makedirs(prop_out_dir) print('Proposal images saved to:', prop_out_dir) if draw_overlaid: overlaid_out_dir = output_dir_base + '/overlaid/{}/{}/{}'.format( dataset.data_split, global_step, mlod_score_threshold) if not os.path.exists(overlaid_out_dir): os.makedirs(overlaid_out_dir) print('Overlaid images saved to:', overlaid_out_dir) if draw_predictions_separate: pred_out_dir = output_dir_base + '/predictions/{}/{}/{}'.format( dataset.data_split, global_step, mlod_score_threshold) if not os.path.exists(pred_out_dir): os.makedirs(pred_out_dir) print('Prediction images saved to:', pred_out_dir) # Rolling average array of times for time estimation avg_time_arr_length = 10 last_times = np.repeat(time.time(), avg_time_arr_length) + \ np.arange(avg_time_arr_length) for sample_idx in range(dataset.num_samples): # Estimate time remaining with 5 slowest times start_time = time.time() last_times = np.roll(last_times, -1) last_times[-1] = start_time avg_time = np.mean(np.sort(np.diff(last_times))[-5:]) samples_remaining = dataset.num_samples - sample_idx est_time_left = avg_time * samples_remaining # Print progress and time remaining estimate sys.stdout.write('\rSaving {} / {}, Avg Time: {:.3f}s, ' 'Time Remaining: {:.2f}s'. format( sample_idx + 1, dataset.num_samples, avg_time, est_time_left)) sys.stdout.flush() sample_name = dataset.sample_names[sample_idx] img_idx = int(sample_name) ############################## # Proposals ############################## if draw_proposals_separate or draw_overlaid: # Load proposals from files proposals_file_path = proposals_and_scores_dir + \ "/{}/{}.txt".format(global_step, sample_name) if not os.path.exists(proposals_file_path): print('Sample {}: No proposals, skipping'.format(sample_name)) continue print('Sample {}: Drawing proposals'.format(sample_name)) proposals_and_scores = np.loadtxt(proposals_file_path) proposal_boxes_3d = proposals_and_scores[:, 0:7] proposal_scores = proposals_and_scores[:, 7] # Apply score mask to proposals score_mask = proposal_scores > rpn_score_threshold proposal_boxes_3d = proposal_boxes_3d[score_mask] proposal_scores = proposal_scores[score_mask] proposal_objs = \ [box_3d_encoder.box_3d_to_object_label(proposal, obj_type='Proposal') for proposal in proposal_boxes_3d] ############################## # Predictions ############################## if draw_predictions_separate or draw_overlaid: predictions_file_path = predictions_and_scores_dir + \ "/{}/{}.txt".format(global_step, sample_name) if not os.path.exists(predictions_file_path): continue # Load predictions from files predictions_and_scores = np.loadtxt( predictions_and_scores_dir + "/{}/{}.txt".format(global_step, sample_name)) prediction_boxes_3d = predictions_and_scores[:, 0:7] prediction_scores = predictions_and_scores[:, 7] prediction_class_indices = predictions_and_scores[:, 8] # process predictions only if we have any predictions left after # masking if len(prediction_boxes_3d) > 0: # Apply score mask mlod_score_mask = prediction_scores >= 0.1 mlod_show_mask = mlod_score_mask prediction_boxes_3d = prediction_boxes_3d[mlod_show_mask] prediction_scores = prediction_scores[mlod_show_mask] prediction_class_indices = \ prediction_class_indices[mlod_show_mask] # # Swap l, w for predictions where w > l # swapped_indices = \ # prediction_boxes_3d[:, 4] > prediction_boxes_3d[:, 3] # prediction_boxes_3d = np.copy(prediction_boxes_3d) # prediction_boxes_3d[swapped_indices, 3] = \ # prediction_boxes_3d[swapped_indices, 4] # prediction_boxes_3d[swapped_indices, 4] = \ # prediction_boxes_3d[swapped_indices, 3] ############################## # Ground Truth ############################## # Get ground truth labels dataset.has_labels = False if dataset.has_labels: gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx) else: gt_objects = [] # Filter objects to desired difficulty filtered_gt_objs = dataset.kitti_utils.filter_labels( gt_objects, classes=gt_classes) boxes2d, _, _ = obj_utils.build_bbs_from_objects( filtered_gt_objs, class_needed=gt_classes) image_path = dataset.get_rgb_image_path(sample_name) image = Image.open(image_path) image_size = image.size # Read the stereo calibration matrix for visualization stereo_calib = calib_utils.read_calibration(dataset.calib_dir, img_idx) calib_p2 = stereo_calib.p2 ############################## # Reformat and prepare to draw ############################## if draw_proposals_separate or draw_overlaid: proposals_as_anchors = box_3d_encoder.box_3d_to_anchor( proposal_boxes_3d) if test_img_tensor_projection: proposal_boxes = demo_utils.tf_project_to_image_space( proposals_as_anchors, calib_p2, image_size, img_idx) else: proposal_boxes, _ = anchor_projector.project_to_image_space( proposals_as_anchors, calib_p2, image_size) num_of_proposals = proposal_boxes_3d.shape[0] prop_fig, prop_2d_axes, prop_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False) draw_proposals(filtered_gt_objs, calib_p2, num_of_proposals, proposal_objs, proposal_boxes, prop_2d_axes, prop_3d_axes, draw_orientations_on_prop) if draw_proposals_separate: # Save just the proposals filename = prop_out_dir + '/' + sample_name + '.png' plt.savefig(filename) if not draw_overlaid: plt.close(prop_fig) if draw_overlaid or draw_predictions_separate: if len(prediction_boxes_3d) > 0: # Project the 3D box predictions to image space image_filter = [] final_boxes_2d = [] for i in range(len(prediction_boxes_3d)): box_3d = prediction_boxes_3d[i, 0:7] img_box = box_3d_projector.project_to_image_space( box_3d, calib_p2, truncate=True, image_size=image_size, discard_before_truncation=False) if img_box is not None: image_filter.append(True) final_boxes_2d.append(img_box) else: image_filter.append(False) final_boxes_2d = np.asarray(final_boxes_2d) final_prediction_boxes_3d = prediction_boxes_3d[image_filter] final_scores = prediction_scores[image_filter] final_class_indices = prediction_class_indices[image_filter] num_of_predictions = final_boxes_2d.shape[0] # Convert to objs final_prediction_objs = \ [box_3d_encoder.box_3d_to_object_label( prediction, obj_type='Prediction') for prediction in final_prediction_boxes_3d] for (obj, score) in zip(final_prediction_objs, final_scores): obj.score = score else: if save_empty_images: pred_fig, pred_2d_axes, pred_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False, fig_size=fig_size) filename = pred_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(pred_fig) continue if draw_overlaid: # Overlay prediction boxes on image draw_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, prop_2d_axes, prop_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred, iou_3d) filename = overlaid_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(prop_fig) if draw_predictions_separate: # Now only draw prediction boxes on images # on a new figure handler if draw_projected_2d_boxes: pred_fig, pred_2d_axes, pred_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False, fig_size=fig_size) draw_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, pred_2d_axes, pred_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred, iou_3d) else: pred_fig, pred_3d_axes = \ vis_utils.visualize_single_plot( dataset.rgb_image_dir, img_idx, display=False) draw_3d_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, pred_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred) filename = pred_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(pred_fig) print('\nDone')
def train_and_eval(model_config, train_config, eval_config, dataset_config): # Dataset Configuration dataset_config_train = DatasetBuilder.copy_config(dataset_config) dataset_config_eval = DatasetBuilder.copy_config(dataset_config) dataset_train = DatasetBuilder.build_kitti_dataset(dataset_config_train, use_defaults=False) dataset_eval = DatasetBuilder.build_kitti_dataset(dataset_config_eval, use_defaults=False) model_name = model_config.model_name train_val_test = 'train' eval_mode = eval_config.eval_mode if eval_mode == 'train': raise ValueError('Evaluation mode can only be set to `val` or `test`.') # keep a copy as this will be overwritten inside # the training loop below max_train_iter = train_config.max_iterations checkpoint_interval = train_config.checkpoint_interval eval_interval = eval_config.eval_interval if eval_interval < checkpoint_interval or \ (eval_interval % checkpoint_interval) != 0: raise ValueError( 'Checkpoint interval (given {}) must be greater than and' 'divisible by the evaluation interval (given {}).'.format( eval_interval, checkpoint_interval)) # Use the evaluation losses file to continue from the latest # checkpoint already_evaluated_ckpts = evaluator.get_evaluated_ckpts( model_config, model_name) if len(already_evaluated_ckpts) != 0: current_train_iter = already_evaluated_ckpts[-1] else: current_train_iter = eval_interval # while training is not finished while current_train_iter <= max_train_iter: # Train with tf.Graph().as_default(): if model_name == 'mlod_model': model = MlodModel(model_config, train_val_test=train_val_test, dataset=dataset_train) elif model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=train_val_test, dataset=dataset_train) else: raise ValueError('Invalid model name {}'.format(model_name)) # overwrite the training epochs train_config.max_iterations = current_train_iter print('\n*************** Training ****************\n') trainer.train(model, train_config) current_train_iter += eval_interval # Evaluate with tf.Graph().as_default(): if model_name == 'mlod_model': model = MlodModel(model_config, train_val_test=eval_mode, dataset=dataset_eval) elif model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=eval_mode, dataset=dataset_eval) else: raise ValueError('Invalid model name {}'.format(model_name)) print('\n*************** Evaluating *****************\n') evaluator.run_latest_checkpoints(model, dataset_config_eval) print('\n************ Finished training and evaluating *************\n')
def main(): """ Visualization of 3D grid anchor generation, showing 2D projections in BEV and image space, and a 3D display of the anchors """ dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_TRAIN) dataset_config.num_clusters[0] = 1 dataset = DatasetBuilder.build_kitti_dataset(dataset_config) label_cluster_utils = LabelClusterUtils(dataset) clusters, _ = label_cluster_utils.get_clusters() # Options img_idx = 1 # fake_clusters = np.array([[5, 4, 3], [6, 5, 4]]) # fake_clusters = np.array([[3, 3, 3], [4, 4, 4]]) fake_clusters = np.array([[4, 2, 3]]) fake_anchor_stride = [5.0, 5.0] ground_plane = [0, -1, 0, 1.72] anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() area_extents = np.array([[-40, 40], [-5, 5], [0, 70]]) # Generate anchors for cars only start_time = time.time() anchor_boxes_3d = anchor_3d_generator.generate( area_3d=dataset.kitti_utils.area_extents, anchor_3d_sizes=fake_clusters, anchor_stride=fake_anchor_stride, ground_plane=ground_plane) all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d) end_time = time.time() print("Anchors generated in {} s".format(end_time - start_time)) # Project into bev bev_boxes, bev_normalized_boxes = \ anchor_projector.project_to_bev(all_anchors, area_extents[[0, 2]]) bev_fig, (bev_axes, bev_normalized_axes) = \ plt.subplots(1, 2, figsize=(16, 7)) bev_axes.set_xlim(0, 80) bev_axes.set_ylim(70, 0) bev_normalized_axes.set_xlim(0, 1.0) bev_normalized_axes.set_ylim(1, 0.0) plt.show(block=False) for box in bev_boxes: box_w = box[2] - box[0] box_h = box[3] - box[1] rect = patches.Rectangle((box[0], box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_axes.add_patch(rect) for normalized_box in bev_normalized_boxes: box_w = normalized_box[2] - normalized_box[0] box_h = normalized_box[3] - normalized_box[1] rect = patches.Rectangle((normalized_box[0], normalized_box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_normalized_axes.add_patch(rect) rgb_fig, rgb_2d_axes, rgb_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx) plt.show(block=False) image_path = dataset.get_rgb_image_path(dataset.sample_names[img_idx]) image_shape = np.array(Image.open(image_path)).shape stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 start_time = time.time() rgb_boxes, rgb_normalized_boxes = \ anchor_projector.project_to_image_space(all_anchors, stereo_calib_p2, image_shape) end_time = time.time() print("Anchors projected in {} s".format(end_time - start_time)) # Read the stereo calibration matrix for visualization stereo_calib = calib_utils.read_calibration(dataset.calib_dir, 0) p = stereo_calib.p2 # Overlay boxes on images anchor_objects = [] for anchor_idx in range(len(anchor_boxes_3d)): anchor_box_3d = anchor_boxes_3d[anchor_idx] obj_label = box_3d_encoder.box_3d_to_object_label(anchor_box_3d) # Append to a list for visualization in VTK later anchor_objects.append(obj_label) # Draw 3D boxes vis_utils.draw_box_3d(rgb_3d_axes, obj_label, p) # Draw 2D boxes rgb_box_2d = rgb_boxes[anchor_idx] box_x1 = rgb_box_2d[0] box_y1 = rgb_box_2d[1] box_w = rgb_box_2d[2] - box_x1 box_h = rgb_box_2d[3] - box_y1 rect = patches.Rectangle((box_x1, box_y1), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') rgb_2d_axes.add_patch(rect) if anchor_idx % 32 == 0: rgb_fig.canvas.draw() plt.show(block=False) # Create VtkGroundPlane for ground plane visualization vtk_ground_plane = VtkGroundPlane() vtk_ground_plane.set_plane(ground_plane, area_extents[[0, 2]]) # Create VtkAxes axes = vtk.vtkAxesActor() axes.SetTotalLength(5, 5, 5) # Create VtkBoxes for boxes vtk_boxes = VtkBoxes() vtk_boxes.set_objects(anchor_objects, vtk_boxes.COLOUR_SCHEME_KITTI) # Create Voxel Grid Renderer in bottom half vtk_renderer = vtk.vtkRenderer() vtk_renderer.AddActor(vtk_boxes.vtk_actor) vtk_renderer.AddActor(vtk_ground_plane.vtk_actor) vtk_renderer.AddActor(axes) vtk_renderer.SetBackground(0.2, 0.3, 0.4) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(170.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(2.5) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() vtk_render_window.SetWindowName("Anchors") vtk_render_window.SetSize(900, 500) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) vtk_render_window_interactor.SetInteractorStyle( vtk.vtkInteractorStyleTrackballCamera()) # Render in VTK vtk_render_window.Render() vtk_render_window_interactor.Start() # Blocking