def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib_p2 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p2 point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Check if the run is training and if the train augmentation is set if self.train_val_test == 'train' and self.is_train_aug: # Generate a random aug probability is_aug = np.random.uniform(0, 1) if is_aug > 0.5: # Make a random choice from the list of available aug options random_aug = random.choice(self.augs) # Apply the corresponding aug method to the image image_input[:, :, 0:3] = getattr(kitti_aug, random_aug)(image_input[:, :, 0:3]) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib = calib_utils.read_calibration( self.calib_dir, int(sample_name)) stereo_calib_p2 = stereo_calib.p2 point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) # WZN: the flipping augmentation flips both image(in camera frame), pointcloud (in Lidar frame), and calibration #matrix(between cam and Lidar) so the correspondence is still true. if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane, output_indices=self.output_indices) #WZN produce input for sparse pooling if self.output_indices: voxel_indices = bev_images[1] pts_in_voxel = bev_images[2] bev_images = bev_images[0] height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) #import pdb #pdb.set_trace() #WZN produce input for sparse pooling if self.output_indices: sparse_pooling_input1 = produce_sparse_pooling_input( gen_sparse_pooling_input_avod( pts_in_voxel, voxel_indices, stereo_calib, [image_shape[1], image_shape[0]], bev_input.shape[0:2]), stride=[1, 1]) #WZN: Note here avod padded the vgg input by 4, so add it bev_input_padded = np.copy(bev_input.shape[0:2]) bev_input_padded[0] = bev_input_padded[0] + 4 sparse_pooling_input2 = produce_sparse_pooling_input( gen_sparse_pooling_input_avod( pts_in_voxel, voxel_indices, stereo_calib, [image_shape[1], image_shape[0]], bev_input_padded), stride=[8, 8]) sparse_pooling_input = [ sparse_pooling_input1, sparse_pooling_input2 ] else: sparse_pooling_input = None sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, #WZN: for sparse pooling constants.KEY_SPARSE_POOLING_INPUT: sparse_pooling_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def _calculate_anchors_info(self, all_anchor_boxes_bev, empty_anchor_filter, gt_labels): """Calculates the list of anchor information in the format: N x 8 [max_gt_2d_iou_r, max_gt_2d_iou_h, (6 x offsets), class_index] max_gt_out - highest 2D iou with any ground truth box, using [anchor_r vs gt_r] or [anchor_h vs gt_h] offsets - encoded offsets [dx, dy, d_dimx, d_dimy, d_angle, angle_face_class_index, (-180,0) or (0,180)] class_index - the anchor's class as an index (e.g. 0 or 1, for "Background" or "Car") Args: all_anchor_boxes_3d: list of anchors in box_3d format N x [xc, yc, w, h, angle] empty_anchor_filter: boolean mask of which anchors are non empty gt_labels: list of Object Label data format containing ground truth labels to generate positives/negatives from. Returns: list of anchor info """ # Check for ground truth objects if len(gt_labels) == 0: raise Warning("No valid ground truth label to generate anchors.") kitti_utils = self._dataset.kitti_utils # Filter empty anchors anchor_indices = np.where(empty_anchor_filter)[0] anchors = all_anchor_boxes_bev[empty_anchor_filter] # Convert anchor_boxes_3d to anchor format #anchors = box_bev_encoder.box_bev_to_anchor(anchor_boxes_bev) # Convert gt to boxes_3d -> anchors -> iou format gt_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(gt_obj) for gt_obj in gt_labels ]) gt_anchors_norm, _ = box_3d_projector.project_to_bev_box( gt_boxes_3d, self._area_extents[[0, 2]]) #bev_image_size = kitti_utils.area_extents / kitti_utils.voxel_size bev_map_h, bev_map_w = self._bev_shape #(N, 5) , (5, ) coorespondence element multiplification gt_anchors = np.multiply( gt_anchors_norm, np.array([bev_map_w, bev_map_h, bev_map_w, bev_map_h, 1])) iou_type = self.mini_batch_utils.retinanet_iou_type if iou_type == '2d_rotate': # Convert anchors to 2d iou format anchors_for_2d_iou_r = anchors gt_boxes_for_2d_iou_r = gt_anchors elif iou_type == '2d': # Convert anchors to 3d iou format for calculation anchors_for_2d_iou_h = box_bev_encoder.box_bev_to_iou_h_format( anchors) anchors_for_2d_iou_h = anchors_for_2d_iou_h.astype(np.int32) gt_boxes_for_2d_iou_h = box_bev_encoder.box_bev_to_iou_h_format( gt_anchors) gt_boxes_for_2d_iou_h = gt_boxes_for_2d_iou_h.astype(np.int32) else: raise ValueError('Invalid retinanet iou_type {}', iou_type) # Initialize sample and offset lists num_anchors = len(anchors) all_info = np.zeros((num_anchors, self.mini_batch_utils.col_length)) # Update anchor indices all_info[:, self.mini_batch_utils.col_anchor_indices] = anchor_indices # For each of the labels, generate samples for gt_idx in range(len(gt_labels)): gt_obj = gt_labels[gt_idx] gt_box_3d = box_3d_encoder.object_label_to_box_3d(gt_obj) # Get 2D or 3D IoU for every anchor if self.mini_batch_utils.retinanet_iou_type == '2d': gt_box_for_2d_iou_h = gt_boxes_for_2d_iou_h[gt_idx] ious = evaluation.two_d_iou(gt_box_for_2d_iou_h, anchors_for_2d_iou_h) elif self.mini_batch_utils.retinanet_iou_type == '2d_rotate': gt_box_for_2d_iou_r = gt_boxes_for_2d_iou_r[gt_idx] ious = evaluation.two_d_rotate_iou(gt_box_for_2d_iou_r, anchors_for_2d_iou_r) # Only update indices with a higher iou than before update_indices = np.greater( ious, all_info[:, self.mini_batch_utils.col_ious]) # Get ious to update ious_to_update = ious[update_indices] # Calculate offsets, use 3D iou to get highest iou anchors_to_update = anchors[update_indices] facing_obj_head = gt_obj.ry >= 0 #camera facing object's head. gt_anchor = gt_anchors[gt_idx] #turns (-pi, pi) to (-pi, 0) for gt_anchor's angle if facing_obj_head: gt_anchor[-1] -= np.pi offsets_boxes = anchor_bev_encoder.anchor_to_offset( anchors_to_update, gt_anchor) gt_anchor_pred = anchor_bev_encoder.offset_to_anchor( anchors_to_update, offsets_boxes) #y axis 3d value n_anchor = offsets_boxes.shape[0] anchor_h = anchor_bev_encoder.get_default_anchor_h(n_anchor, 'np') gt_h = [gt_obj.t[1], gt_obj.h] offsets_h = anchor_bev_encoder.anchor_to_offset_h(anchor_h, gt_h) gt_anchors_angle = np.zeros_like(offsets_boxes[:, 0], dtype=np.int) + gt_obj.ry offsets_angle_cls = orientation_encoder.orientation_to_angle_cls( gt_anchors_angle) offsets = np.hstack( [offsets_boxes, offsets_h, offsets_angle_cls[:, np.newaxis]]) # Convert gt type to index class_idx = kitti_utils.class_str_to_index(gt_obj.type) # Update anchors info (indices already updated) # [index, iou, (offsets), class_index] all_info[update_indices, self.mini_batch_utils.col_ious] = ious_to_update all_info[update_indices, self.mini_batch_utils.col_offsets_lo:self. mini_batch_utils.col_offsets_hi] = offsets all_info[update_indices, self.mini_batch_utils.col_class_idx] = class_idx debug = False #True if debug: print(f'gt obj:{gt_box_3d}, gt anchor bev: {gt_anchor}') print(f'anchors_to_update: {anchors_to_update[:1]}') print(f'update at all_info: \n{all_info[update_indices][:1]}') print(f'gt_from_anchor_offsets:\n{gt_anchor_pred[:1]}') return all_info
def main(): """This demo shows RPN proposals and AVOD predictions in the 3D point cloud. Keys: F1: Toggle proposals F2: Toggle predictions F3: Toggle 3D voxel grid F4: Toggle point cloud F5: Toggle easy ground truth objects (Green) F6: Toggle medium ground truth objects (Orange) F7: Toggle hard ground truth objects (Red) F8: Toggle all ground truth objects (default off) F9: Toggle ground slice filter (default off) F10: Toggle offset slice filter (default off) """ ############################## # Options ############################## rpn_score_threshold = 0.1 avod_score_threshold = 0.1 proposals_line_width = 1.0 predictions_line_width = 3.0 show_orientations = True point_cloud_source = 'depth' # Config file folder, default (<avod_root>/data/outputs/<checkpoint_name>) config_dir = None checkpoint_name = 'pyramid_cars_with_aug_example' global_step = None # Latest checkpoint global_step = 83000 #data_split = 'val_half' data_split = 'val' # data_split = 'test' # Show 3D iou text draw_ious_3d = True name_list =[] #name_file = '/media/wavelab/d3cd89ab-7705-4996-94f3-01da25ba8f50/moosey/val.txt' #with open(name_file) as f: #for line in f: #newline = line.replace("\n","") #name_list.append(newline) #name_list =['0000000003','0000000009','0000000016','0000000233','0000000234','0000000236','0000000422','0000000473','0000000490','0000000494','0000000547','0000000655',\ #'0000000679','0000000690','0000000692','0000000781'] name_list =['0000000004'] for names in name_list: sample_name = names #sample_name = None # # # Cars # # # # sample_name = '000050' # sample_name = '000104' # sample_name = '000169' # sample_name = '000191' # sample_name = '000360' # sample_name = '001783' # sample_name = '001820' # val split # sample_name = '000181' # sample_name = '000751' # sample_name = '000843' # sample_name = '000944' # sample_name = '006338' # # # People # # # # val_half split # sample_name = '000001' # Hard, 1 far cyc # sample_name = '000005' # Easy, 1 ped # sample_name = '000122' # Easy, 1 cyc # sample_name = '000134' # Hard, lots of people # sample_name = '000167' # Medium, 1 ped, 2 cycs # sample_name = '000187' # Medium, 1 ped on left # sample_name = '000381' # Easy, 1 ped # sample_name = '000398' # Easy, 1 ped # sample_name = '000401' # Hard, obscured peds # sample_name = '000407' # Easy, 1 ped # sample_name = '000448' # Hard, several far people # sample_name = '000486' # Hard 2 obscured peds # sample_name = '000509' # Easy, 1 ped # sample_name = '000718' # Hard, lots of people # sample_name = '002216' # Easy, 1 cyc # val split # sample_name = '000015' # sample_name = '000048' # sample_name = '000058' # sample_name = '000076' # Medium, few ped, 1 cyc # sample_name = '000108' # sample_name = '000118' # sample_name = '000145' # sample_name = '000153' # sample_name = '000186' # sample_name = '000195' # sample_name = '000199' # sample_name = '000397' # sample_name = '004425' # sample_name = '004474' # Hard, many ped, 1 cyc # sample_name = '004657' # Hard, Few cycl, few ped # sample_name = '006071' # sample_name = '006828' # Hard, Few cycl, few ped # sample_name = '006908' # Hard, Few cycl, few ped # sample_name = '007412' # sample_name = '007318' # Hard, Few cycl, few ped ############################## # End of Options ############################## if data_split == 'test': draw_ious_3d = False if config_dir is None: config_dir = avod.root_dir() + '/data/outputs/' + checkpoint_name # Parse experiment config pipeline_config_file = \ config_dir + '/' + checkpoint_name + '.config' _, _, _, dataset_config = \ config_builder_util.get_configs_from_pipeline_file( pipeline_config_file, is_training=False) dataset_config.data_split = data_split if data_split == 'test': dataset_config.data_split_dir = 'testing' dataset_config.has_labels = False dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) # Random sample if sample_name is None: sample_idx = np.random.randint(0, dataset.num_samples) sample_name = dataset.sample_names[sample_idx] ############################## # Setup Paths ############################## img_idx = int(sample_name) # Text files directory proposals_and_scores_dir = avod.root_dir() + \ '/data/outputs/' + checkpoint_name + '/predictions' + \ '/proposals_and_scores/' + dataset.data_split predictions_and_scores_dir = avod.root_dir() + \ '/data/outputs/' + checkpoint_name + '/predictions' + \ '/final_predictions_and_scores/' + dataset.data_split # Get checkpoint step steps = os.listdir(proposals_and_scores_dir) steps.sort(key=int) print('Available steps: {}'.format(steps)) # Use latest checkpoint if no index provided if global_step is None: global_step = steps[-1] # Output images directory img_out_dir = avod.root_dir() + '/data/outputs/' + checkpoint_name + \ '/predictions/images_3d/{}/{}/{}'.format(dataset.data_split, global_step, rpn_score_threshold) if not os.path.exists(img_out_dir): os.makedirs(img_out_dir) ############################## # Proposals ############################## # Load proposals from files proposals_and_scores = np.loadtxt(proposals_and_scores_dir + "/{}/{}.txt".format(global_step, sample_name)) proposals = proposals_and_scores[:, 0:7] proposal_scores = proposals_and_scores[:, 7] rpn_score_mask = proposal_scores > rpn_score_threshold proposals = proposals[rpn_score_mask] proposal_scores = proposal_scores[rpn_score_mask] print('Proposals:', len(proposal_scores), proposal_scores) proposal_objs = \ [box_3d_encoder.box_3d_to_object_label(proposal, obj_type='Proposal') for proposal in proposals] ############################## # Predictions ############################## # Load proposals from files predictions_and_scores = np.loadtxt(predictions_and_scores_dir + "/{}/{}.txt".format( global_step, sample_name)).reshape(-1, 9) prediction_boxes_3d = predictions_and_scores[:, 0:7] prediction_scores = predictions_and_scores[:, 7] prediction_types = np.asarray(predictions_and_scores[:, 8], dtype=np.int32) avod_score_mask = prediction_scores >= avod_score_threshold prediction_boxes_3d = prediction_boxes_3d[avod_score_mask] prediction_scores = prediction_scores[avod_score_mask] print('Predictions: ', len(prediction_scores), prediction_scores) final_predictions = np.copy(prediction_boxes_3d) # # Swap l, w for predictions where w > l # swapped_indices = predictions[:, 4] > predictions[:, 3] # final_predictions[swapped_indices, 3] = predictions[swapped_indices, 4] # final_predictions[swapped_indices, 4] = predictions[swapped_indices, 3] prediction_objs = [] for pred_idx in range(len(final_predictions)): prediction_box_3d = final_predictions[pred_idx] prediction_type = dataset.classes[prediction_types[pred_idx]] prediction_obj = box_3d_encoder.box_3d_to_object_label( prediction_box_3d, obj_type=prediction_type) prediction_objs.append(prediction_obj) ############################## # Ground Truth ############################## if dataset.has_labels: # Get ground truth labels easy_gt_objs, medium_gt_objs, \ hard_gt_objs, all_gt_objs = \ demo_utils.get_gts_based_on_difficulty(dataset, img_idx) else: easy_gt_objs = medium_gt_objs = hard_gt_objs = all_gt_objs = [] ############################## # 3D IoU ############################## if draw_ious_3d: # Convert to box_3d all_gt_boxes_3d = [box_3d_encoder.object_label_to_box_3d(gt_obj) for gt_obj in all_gt_objs] pred_boxes_3d = [box_3d_encoder.object_label_to_box_3d(pred_obj) for pred_obj in prediction_objs] max_ious_3d = demo_utils.get_max_ious_3d(all_gt_boxes_3d, pred_boxes_3d) ############################## # Point Cloud ############################## image_path = dataset.get_rgb_image_path(sample_name) image = cv2.imread(image_path) print("***************") print(point_cloud_source) print(img_idx) print(image.shape) point_cloud = dataset.kitti_utils.get_point_cloud(point_cloud_source, img_idx, image_shape=image.shape) print("This is the shape of the point_cloud") print(point_cloud.shape) point_cloud = np.asarray(point_cloud) # Filter point cloud to extents area_extents = np.asarray([[-40, 40], [-5, 3], [0, 70]]) bev_extents = area_extents[[0, 2]] points = point_cloud.T point_filter = obj_utils.get_point_filter(point_cloud, area_extents) points = points[point_filter] point_colours = vis_utils.project_img_to_point_cloud(points, image, dataset.calib_dir, img_idx) # Voxelize the point cloud for visualization voxel_grid = VoxelGrid() voxel_grid.voxelize(points, voxel_size=0.1, create_leaf_layout=False) # Ground plane ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) ############################## # Visualization ############################## # Create VtkVoxelGrid vtk_voxel_grid = VtkVoxelGrid() vtk_voxel_grid.set_voxels(voxel_grid) vtk_point_cloud = VtkPointCloud() vtk_point_cloud.set_points(points, point_colours) # Create VtkAxes vtk_axes = vtk.vtkAxesActor() vtk_axes.SetTotalLength(5, 5, 5) # Create VtkBoxes for proposal boxes vtk_proposal_boxes = VtkBoxes() vtk_proposal_boxes.set_line_width(proposals_line_width) vtk_proposal_boxes.set_objects(proposal_objs, COLOUR_SCHEME_PREDICTIONS) # Create VtkBoxes for prediction boxes vtk_prediction_boxes = VtkPyramidBoxes() vtk_prediction_boxes.set_line_width(predictions_line_width) vtk_prediction_boxes.set_objects(prediction_objs, COLOUR_SCHEME_PREDICTIONS, show_orientations) # Create VtkBoxes for ground truth vtk_hard_gt_boxes = VtkBoxes() vtk_medium_gt_boxes = VtkBoxes() vtk_easy_gt_boxes = VtkBoxes() vtk_all_gt_boxes = VtkBoxes() vtk_hard_gt_boxes.set_objects(hard_gt_objs, COLOUR_SCHEME_PREDICTIONS, show_orientations) vtk_medium_gt_boxes.set_objects(medium_gt_objs, COLOUR_SCHEME_PREDICTIONS, show_orientations) vtk_easy_gt_boxes.set_objects(easy_gt_objs, COLOUR_SCHEME_PREDICTIONS, show_orientations) vtk_all_gt_boxes.set_objects(all_gt_objs, VtkBoxes.COLOUR_SCHEME_KITTI, show_orientations) # Create VtkTextLabels for 3D ious vtk_text_labels = VtkTextLabels() if draw_ious_3d and len(all_gt_boxes_3d) > 0: gt_positions_3d = np.asarray(all_gt_boxes_3d)[:, 0:3] vtk_text_labels.set_text_labels( gt_positions_3d, ['{:0.3f}'.format(iou_3d) for iou_3d in max_ious_3d]) # Create VtkGroundPlane vtk_ground_plane = VtkGroundPlane() vtk_slice_bot_plane = VtkGroundPlane() vtk_slice_top_plane = VtkGroundPlane() vtk_ground_plane.set_plane(ground_plane, bev_extents) vtk_slice_bot_plane.set_plane(ground_plane + [0, 0, 0, -0.2], bev_extents) vtk_slice_top_plane.set_plane(ground_plane + [0, 0, 0, -2.0], bev_extents) # Create Voxel Grid Renderer in bottom half vtk_renderer = vtk.vtkRenderer() vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor) vtk_renderer.AddActor(vtk_point_cloud.vtk_actor) vtk_renderer.AddActor(vtk_proposal_boxes.vtk_actor) vtk_renderer.AddActor(vtk_prediction_boxes.vtk_actor) vtk_renderer.AddActor(vtk_hard_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_medium_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_easy_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_all_gt_boxes.vtk_actor) vtk_renderer.AddActor(vtk_text_labels.vtk_actor) # Add ground plane and slice planes vtk_renderer.AddActor(vtk_ground_plane.vtk_actor) vtk_renderer.AddActor(vtk_slice_bot_plane.vtk_actor) vtk_renderer.AddActor(vtk_slice_top_plane.vtk_actor) vtk_renderer.AddActor(vtk_axes) vtk_renderer.SetBackground(0.2, 0.3, 0.4) # Set initial properties for some actors vtk_point_cloud.vtk_actor.GetProperty().SetPointSize(3) vtk_proposal_boxes.vtk_actor.SetVisibility(0) vtk_voxel_grid.vtk_actor.SetVisibility(0) vtk_all_gt_boxes.vtk_actor.SetVisibility(0) vtk_ground_plane.vtk_actor.SetVisibility(0) vtk_slice_bot_plane.vtk_actor.SetVisibility(0) vtk_slice_top_plane.vtk_actor.SetVisibility(0) vtk_ground_plane.vtk_actor.GetProperty().SetOpacity(0.9) vtk_slice_bot_plane.vtk_actor.GetProperty().SetOpacity(0.9) vtk_slice_top_plane.vtk_actor.GetProperty().SetOpacity(0.9) # Setup Camera current_cam = vtk_renderer.GetActiveCamera() current_cam.Pitch(140.0) current_cam.Roll(180.0) # Zooms out to fit all points on screen vtk_renderer.ResetCamera() # Zoom in slightly current_cam.Zoom(2) # Reset the clipping range to show all points vtk_renderer.ResetCameraClippingRange() # Setup Render Window vtk_render_window = vtk.vtkRenderWindow() vtk_render_window.SetWindowName( "Predictions: Step {}, Sample {}, Min Score {}".format( global_step, sample_name, avod_score_threshold, )) vtk_render_window.SetSize(900, 600) vtk_render_window.AddRenderer(vtk_renderer) # Setup custom interactor style, which handles mouse and key events vtk_render_window_interactor = vtk.vtkRenderWindowInteractor() vtk_render_window_interactor.SetRenderWindow(vtk_render_window) # Add custom interactor to toggle actor visibilities custom_interactor = vis_utils.CameraInfoInteractorStyle([ vtk_proposal_boxes.vtk_actor, vtk_prediction_boxes.vtk_actor, vtk_voxel_grid.vtk_actor, vtk_point_cloud.vtk_actor, vtk_easy_gt_boxes.vtk_actor, vtk_medium_gt_boxes.vtk_actor, vtk_hard_gt_boxes.vtk_actor, vtk_all_gt_boxes.vtk_actor, vtk_ground_plane.vtk_actor, vtk_slice_bot_plane.vtk_actor, vtk_slice_top_plane.vtk_actor, vtk_text_labels.vtk_actor, ]) vtk_render_window_interactor.SetInteractorStyle(custom_interactor) # Render in VTK vtk_render_window.Render() # Take a screenshot window_to_image_filter = vtk.vtkWindowToImageFilter() window_to_image_filter.SetInput(vtk_render_window) window_to_image_filter.Update() png_writer = vtk.vtkPNGWriter() file_name = img_out_dir + "/{}.png".format(sample_name) png_writer.SetFileName(file_name) png_writer.SetInputData(window_to_image_filter.GetOutput()) png_writer.Write() print('Screenshot saved to ', file_name) #vtk_render_window_interactor.Start() # Blocking vtk_render_window_interactor.Initialize() # Non-Blocking
def main(): """ Displays the bird's eye view maps for a KITTI sample. """ ############################## # Options ############################## bev_generator = 'slices' slices_config = \ """ slices { height_lo: -0.2 height_hi: 2.3 num_slices: 5 } """ # Use None for a random image img_idx = None # img_idx = 142 # img_idx = 191 show_ground_truth = True # Whether to overlay ground_truth boxes point_cloud_source = 'lidar' ############################## # End of Options ############################## dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL) dataset_config = DatasetBuilder.merge_defaults(dataset_config) # Overwrite bev_generator if bev_generator == 'slices': text_format.Merge(slices_config, dataset_config.kitti_utils_config.bev_generator) else: raise ValueError('Invalid bev_generator') dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) if img_idx is None: img_idx = int(random.random() * dataset.num_samples) sample_name = "{:06}".format(img_idx) print('=== Showing BEV maps for image: {}.png ==='.format(sample_name)) # Load image image = cv2.imread(dataset.get_rgb_image_path(sample_name)) image_shape = image.shape[0:2] kitti_utils = dataset.kitti_utils point_cloud = kitti_utils.get_point_cloud(point_cloud_source, img_idx, image_shape) ground_plane = kitti_utils.get_ground_plane(sample_name) bev_images = kitti_utils.create_bev_maps(point_cloud, ground_plane) height_maps = np.array(bev_images.get("height_maps")) density_map = np.array(bev_images.get("density_map")) box_points, box_points_norm = [None, None] if show_ground_truth: # Get projected boxes obj_labels = obj_utils.read_labels(dataset.label_dir, img_idx) filtered_objs = obj_labels label_boxes = [] for label in filtered_objs: box = box_3d_encoder.object_label_to_box_3d(label) label_boxes.append(box) label_boxes = np.array(label_boxes) box_points, box_points_norm = box_3d_projector.project_to_bev( label_boxes, [[-40, 40], [0, 70]]) rgb_img_size = (np.array((1242, 375)) * 0.75).astype(np.int16) img_x_start = 60 img_y_start = 330 img_x = img_x_start img_y = img_y_start img_w = 400 img_h = 350 img_titlebar_h = 20 # Show images vis_utils.cv2_show_image("Image", image, size_wh=rgb_img_size, location_xy=(img_x, 0)) # Height maps for map_idx in range(len(height_maps)): height_map = height_maps[map_idx] height_map = draw_boxes(height_map, box_points_norm) vis_utils.cv2_show_image("Height Map {}".format(map_idx), height_map, size_wh=(img_w, img_h), location_xy=(img_x, img_y)) img_x += img_w # Wrap around if (img_x + img_w) > 1920: img_x = img_x_start img_y += img_h + img_titlebar_h # Density map density_map = draw_boxes(density_map, box_points_norm) vis_utils.cv2_show_image("Density Map", density_map, size_wh=(img_w, img_h), location_xy=(img_x, img_y)) cv2.waitKey()
def load_samples(self, indices, sin_type=None, sin_level=None, sin_input_name=None, gen_all_sin_inputs=False, list_mask_2d=None): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for idx, sample_idx in enumerate(indices): sample = self.sample_list[sample_idx] sample_name = sample.name if list_mask_2d: mask_2d = list_mask_2d[idx] else: mask_2d = None # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib_p2 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p2 # Read lidar with subsampling (handled before other preprocessing) if (sin_type == 'lowres') and (sin_input_name == 'lidar'): stride_sub = get_stride_sub(sin_level) point_cloud = get_point_cloud_sub(img_idx, self.calib_dir, self.velo_dir, image_shape, stride_sub) elif (sin_type == 'lowres') and gen_all_sin_inputs: stride_sub = get_stride_sub(sin_level) point_cloud = get_point_cloud_sub(img_idx, self.calib_dir, self.velo_dir, image_shape, stride_sub) else: point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) # Add Single Input Noise if (sin_input_name in SINFields.SIN_INPUT_NAMES) and ( sin_type in SINFields.VALID_SIN_TYPES): image_input, point_cloud = genSINtoInputs( image_input, point_cloud, sin_type=sin_type, sin_level=sin_level, sin_input_name=sin_input_name, mask_2d=mask_2d, frame_calib_p2=stereo_calib_p2) # Add Input Noise to all if gen_all_sin_inputs: image_input, point_cloud = genSINtoAllInputs( image_input, point_cloud, sin_type=sin_type, sin_level=sin_level, mask_2d=mask_2d, frame_calib_p2=stereo_calib_p2) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def _calculate_anchors_info(self, all_anchor_boxes_3d, empty_anchor_filter, gt_labels): """Calculates the list of anchor information in the format: N x 8 [max_gt_2d_iou, max_gt_3d_iou, (6 x offsets), class_index] max_gt_out - highest 3D iou with any ground truth box offsets - encoded offsets [dx, dy, dz, d_dimx, d_dimy, d_dimz] class_index - the anchor's class as an index (e.g. 0 or 1, for "Background" or "Car") Args: all_anchor_boxes_3d: list of anchors in box_3d format N x [x, y, z, l, w, h, ry] empty_anchor_filter: boolean mask of which anchors are non empty gt_labels: list of Object Label data format containing ground truth labels to generate positives/negatives from. Returns: list of anchor info """ # Check for ground truth objects if len(gt_labels) == 0: raise Warning("No valid ground truth label to generate anchors.") kitti_utils = self._dataset.kitti_utils # Filter empty anchors anchor_indices = np.where(empty_anchor_filter)[0] anchor_boxes_3d = all_anchor_boxes_3d[empty_anchor_filter] # Convert anchor_boxes_3d to anchor format anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d) # Convert gt to boxes_3d -> anchors -> iou format gt_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(gt_obj) for gt_obj in gt_labels ]) gt_anchors = box_3d_encoder.box_3d_to_anchor(gt_boxes_3d, ortho_rotate=True) rpn_iou_type = self.mini_batch_utils.rpn_iou_type if rpn_iou_type == '2d': # Convert anchors to 2d iou format anchors_for_2d_iou, _ = np.asarray( anchor_projector.project_to_bev(anchors, kitti_utils.bev_extents)) gt_boxes_for_2d_iou, _ = anchor_projector.project_to_bev( gt_anchors, kitti_utils.bev_extents) elif rpn_iou_type == '3d': # Convert anchors to 3d iou format for calculation anchors_for_3d_iou = box_3d_encoder.box_3d_to_3d_iou_format( anchor_boxes_3d) gt_boxes_for_3d_iou = \ box_3d_encoder.box_3d_to_3d_iou_format(gt_boxes_3d) else: raise ValueError('Invalid rpn_iou_type {}', rpn_iou_type) # Initialize sample and offset lists num_anchors = len(anchor_boxes_3d) all_info = np.zeros((num_anchors, self.mini_batch_utils.col_length)) # Update anchor indices all_info[:, self.mini_batch_utils.col_anchor_indices] = anchor_indices # For each of the labels, generate samples for gt_idx in range(len(gt_labels)): gt_obj = gt_labels[gt_idx] gt_box_3d = gt_boxes_3d[gt_idx] # Get 2D or 3D IoU for every anchor if self.mini_batch_utils.rpn_iou_type == '2d': gt_box_for_2d_iou = gt_boxes_for_2d_iou[gt_idx] ious = evaluation.two_d_iou(gt_box_for_2d_iou, anchors_for_2d_iou) elif self.mini_batch_utils.rpn_iou_type == '3d': gt_box_for_3d_iou = gt_boxes_for_3d_iou[gt_idx] ious = evaluation.three_d_iou(gt_box_for_3d_iou, anchors_for_3d_iou) # Only update indices with a higher iou than before update_indices = np.greater( ious, all_info[:, self.mini_batch_utils.col_ious]) # Get ious to update ious_to_update = ious[update_indices] # Calculate offsets, use 3D iou to get highest iou anchors_to_update = anchors[update_indices] gt_anchor = box_3d_encoder.box_3d_to_anchor(gt_box_3d, ortho_rotate=True) offsets = anchor_encoder.anchor_to_offset(anchors_to_update, gt_anchor) # Convert gt type to index class_idx = kitti_utils.class_str_to_index(gt_obj.type) # Update anchors info (indices already updated) # [index, iou, (offsets), class_index] all_info[update_indices, self.mini_batch_utils.col_ious] = ious_to_update all_info[update_indices, self.mini_batch_utils.col_offsets_lo:self. mini_batch_utils.col_offsets_hi] = offsets all_info[update_indices, self.mini_batch_utils.col_class_idx] = class_idx return all_info
def main(): """This demo runs through all samples in the trainval set, and checks that the 3D box projection of all 'Car', 'Van', 'Pedestrian', and 'Cyclist' objects are in the correct flipped 2D location after applying modifications to the stereo p2 matrix. """ dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL, use_defaults=True) np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)}) all_samples = dataset.sample_names all_pixel_errors = [] all_max_pixel_errors = [] total_flip_time = 0.0 for sample_idx in range(dataset.num_samples): sys.stdout.write('\r{} / {}'.format(sample_idx, dataset.num_samples - 1)) sample_name = all_samples[sample_idx] img_idx = int(sample_name) # Run the main loop to run throughout the images frame_calibration_info = calib_utils.read_calibration( dataset.calib_dir, img_idx) # Load labels gt_labels = obj_utils.read_labels(dataset.label_dir, img_idx) gt_labels = dataset.kitti_utils.filter_labels( gt_labels, ['Car', 'Van', 'Pedestrian', 'Cyclist']) image = cv2.imread(dataset.get_rgb_image_path(sample_name)) image_size = [image.shape[1], image.shape[0]] # Flip p2 matrix calib_p2 = frame_calibration_info.p2 flipped_p2 = np.copy(calib_p2) flipped_p2[0, 2] = image.shape[1] - flipped_p2[0, 2] flipped_p2[0, 3] = -flipped_p2[0, 3] for obj_idx in range(len(gt_labels)): obj = gt_labels[obj_idx] # Get original 2D bounding boxes orig_box_3d = box_3d_encoder.object_label_to_box_3d(obj) orig_bbox_2d = box_3d_projector.project_to_image_space( orig_box_3d, calib_p2, truncate=True, image_size=image_size) # Skip boxes outside image if orig_bbox_2d is None: continue orig_bbox_2d_flipped = flip_box_2d(orig_bbox_2d, image_size) # Do flipping start_time = time.time() flipped_obj = kitti_aug.flip_label_in_3d_only(obj) flip_time = time.time() - start_time total_flip_time += flip_time box_3d_flipped = box_3d_encoder.object_label_to_box_3d(flipped_obj) new_bbox_2d_flipped = box_3d_projector.project_to_image_space( box_3d_flipped, flipped_p2, truncate=True, image_size=image_size) pixel_errors = new_bbox_2d_flipped - orig_bbox_2d_flipped max_pixel_error = np.amax(np.abs(pixel_errors)) all_pixel_errors.append(pixel_errors) all_max_pixel_errors.append(max_pixel_error) if max_pixel_error > 5: print(' Error > 5px', sample_idx, max_pixel_error) print(np.round(orig_bbox_2d_flipped, 3), np.round(new_bbox_2d_flipped, 3)) print('Avg flip time:', total_flip_time / dataset.num_samples) # Convert to ndarrays all_pixel_errors = np.asarray(all_pixel_errors) all_max_pixel_errors = np.asarray(all_max_pixel_errors) # Print max values print(np.amax(all_max_pixel_errors)) # Plot pixel errors fig, axes = plt.subplots(nrows=3, ncols=1) ax0, ax1, ax2 = axes.flatten() ax0.hist(all_pixel_errors[:, 0], 50, histtype='bar', facecolor='green') ax1.hist(all_pixel_errors[:, 2], 50, histtype='bar', facecolor='green') ax2.hist(all_max_pixel_errors, 50, histtype='bar', facecolor='green') plt.show()