def test_box_3d_to_anchor_projected(self): """ Check that boxes are projected with ortho_rotate=False, and that projected boxes have the correct dimensions """ thetas = np.arange(0, 2 * np.pi, np.pi / 6) boxes_3d = [] for theta in thetas: boxes_3d.append([1, 2, 3, 4, 5, 6, theta]) boxes_3d = np.asarray(boxes_3d, dtype=np.float64) cos_thetas = np.abs(np.cos(thetas)) sin_thetas = np.abs(np.sin(thetas)) expected_dims_x = 4 * cos_thetas + 5 * sin_thetas expected_dims_z = 4 * sin_thetas + 5 * cos_thetas expected_anchors = [] for exp_x, exp_z in zip(expected_dims_x, expected_dims_z): expected_anchors.append([1, 2, 3, exp_x, 6, exp_z]) expected_anchors = np.asarray(expected_anchors, np.float64) anchors = box_3d_encoder.box_3d_to_anchor(boxes_3d, ortho_rotate=False) np.testing.assert_allclose(anchors, expected_anchors)
def test_box_3d_to_anchor_180_270(self): box_3d = np.asarray([[1, 2, 3, 4, 5, 6, np.pi], [1, 2, 3, 4, 5, 6, 3 * np.pi / 2]], dtype=np.float64) expected_anchors = np.asarray([[1, 2, 3, 4, 6, 5], [1, 2, 3, 5, 6, 4]], dtype=np.float64) anchors = box_3d_encoder.box_3d_to_anchor(box_3d) np.testing.assert_allclose(anchors, expected_anchors)
def _keep_near_anchors(self, all_anchor_boxes_3d, gt_labels, dist_thres=10): kitti_utils = self._dataset.kitti_utils # Convert anchor_boxes_3d to anchor format anchors = box_3d_encoder.box_3d_to_anchor(all_anchor_boxes_3d) # Convert gt to boxes_3d -> anchors -> iou format gt_boxes_3d = np.asarray( [box_3d_encoder.object_label_to_box_3d(gt_obj) for gt_obj in gt_labels]) gt_anchors = box_3d_encoder.box_3d_to_anchor(gt_boxes_3d, ortho_rotate=True) keep = np.zeros(all_anchor_boxes_3d.shape[0], dtype=np.int32) for gt_ind in range(gt_anchors.shape[0]): gt_anchor = np.reshape(gt_anchors[gt_ind], (1, 6)) dist = np.linalg.norm(anchors[:, :3] - gt_anchor[:, :3], axis=1) keep[dist < dist_thres] = 1 return keep.astype(np.bool)
def test_box_3d_to_anchor(self): # box_3d format is [x, y, z, l, w, h, ry] box_3d = np.asarray([[1, 2, 3, 4, 5, 6, 0], [0, 0, 0, 1, 2, 3, 0], [0, 0, 0, 1, 2, 3, np.pi / 2]], dtype=np.float64) # anchor format is [x, y, z, dim_x, dim_y, dim_z] expected_anchors = np.asarray([[1, 2, 3, 4, 6, 5], [0, 0, 0, 1, 3, 2], [0, 0, 0, 2, 3, 1]], dtype=np.float64) anchors = box_3d_encoder.box_3d_to_anchor(box_3d) np.testing.assert_allclose(anchors, expected_anchors)
def test_box_3d_to_anchor_rotated(self): """ Check that rotated boxes are rotated to the nearest 90 and that the dimensions do not change """ # Boxes at ry = 144, 288 should give same results as ry = 180, 270 box_3d = np.asarray([[1, 2, 3, 4, 5, 6, np.pi * 4 / 5], [1, 2, 3, 4, 5, 6, 8 * np.pi / 5]], dtype=np.float64) expected_anchors = np.asarray([[1, 2, 3, 4, 6, 5], [1, 2, 3, 5, 6, 4]], dtype=np.float64) anchors = box_3d_encoder.box_3d_to_anchor(box_3d, ortho_rotate=True) np.testing.assert_allclose(anchors, expected_anchors)
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) label_score_2d_path = self.get_score_2d_path(sample_name) with open(label_score_2d_path, 'rb') as file: label_score_2d = np.load(file) file.close() #label_score_2d = np.zeros((12, 39, 2)) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) label_score_2d = np.zeros((12, 39, 2)) img_idx = int(sample_name) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image cv_bgr_image_r = cv2.imread(self.get_rgb_image_r_path(sample_name)) rgb_image_r = cv_bgr_image_r[..., ::-1] image_r_input = rgb_image_r image_r_shape = rgb_image_r.shape[0:2] # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib_p2 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p2 stereo_calib_p3 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p3 """ point_cloud = self.kitti_utils.get_point_cloud(self.bev_source, img_idx, image_shape) """ # Augmentation (Flipping) """ if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) image_r_input = kitti_aug.flip_image(image_r_input) image_input, image_r_input = image_r_input, image_input #ipoint_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) stereo_calib_p3 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p3, image_r_shape) stereo_calib_p2, stereo_calib_p3 = stereo_calib_p3, stereo_calib_p2 """ # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: #print('Jittering') image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) image_r_input[:, :, 0:3] = kitti_aug.apply_pca_jitter( image_r_input[:, :, 0:3]) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps """ bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) """ sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_LABEL_SCORE_2D: label_score_2d, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_IMAGE_R_INPUT: image_r_input, #constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, #constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_STEREO_CALIB_P3: stereo_calib_p3, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def main(): """This demo shows RPN proposals and AVOD predictions in 3D and 2D in image space. Given certain thresholds for proposals and predictions, it selects and draws the bounding boxes on the image sample. It goes through the entire proposal and prediction samples for the given dataset split. The proposals, overlaid, and prediction images can be toggled on or off separately in the options section. The prediction score and IoU with ground truth can be toggled on or off as well, shown as (score, IoU) above the detection. """ dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL) ############################## # Options ############################## dataset_config.data_split = 'val' fig_size = (10, 6.1) rpn_score_threshold = 0.1 avod_score_threshold = 0.1 # gt_classes = ['Car'] gt_classes = ['Pedestrian', 'Cyclist'] # gt_classes = ['Car', 'Pedestrian', 'Cyclist'] # Overwrite this to select a specific checkpoint global_step = None checkpoint_name = sys.argv[1] #'pyramid_cars_with_aug_example' # Drawing Toggles draw_proposals_separate = False draw_overlaid = False draw_predictions_separate = True # Show orientation for both GT and proposals/predictions draw_orientations_on_prop = False draw_orientations_on_pred = False # Draw 2D bounding boxes draw_projected_2d_boxes = True # Save images for samples with no detections save_empty_images = True draw_score = True draw_iou = True ############################## # End of Options ############################## # Get the dataset dataset = DatasetBuilder.build_kitti_dataset(dataset_config) # Setup Paths predictions_dir = avod.root_dir() + \ '/data/outputs/' + checkpoint_name + '/predictions' proposals_and_scores_dir = predictions_dir + \ '/proposals_and_scores/' + dataset.data_split predictions_and_scores_dir = predictions_dir + \ '/final_predictions_and_scores/' + dataset.data_split # Output images directories output_dir_base = predictions_dir + '/images_2d' # Get checkpoint step steps = os.listdir(proposals_and_scores_dir) steps.sort(key=int) print('Available steps: {}'.format(steps)) # Use latest checkpoint if no index provided if global_step is None: global_step = steps[-1] if draw_proposals_separate: prop_out_dir = output_dir_base + '/proposals/{}/{}/{}'.format( dataset.data_split, global_step, rpn_score_threshold) if not os.path.exists(prop_out_dir): os.makedirs(prop_out_dir) print('Proposal images saved to:', prop_out_dir) if draw_overlaid: overlaid_out_dir = output_dir_base + '/overlaid/{}/{}/{}'.format( dataset.data_split, global_step, avod_score_threshold) if not os.path.exists(overlaid_out_dir): os.makedirs(overlaid_out_dir) print('Overlaid images saved to:', overlaid_out_dir) if draw_predictions_separate: pred_out_dir = output_dir_base + '/predictions/{}/{}/{}'.format( dataset.data_split, global_step, avod_score_threshold) if not os.path.exists(pred_out_dir): os.makedirs(pred_out_dir) print('Prediction images saved to:', pred_out_dir) # Rolling average array of times for time estimation avg_time_arr_length = 10 last_times = np.repeat(time.time(), avg_time_arr_length) + \ np.arange(avg_time_arr_length) for sample_idx in range(dataset.num_samples): # Estimate time remaining with 5 slowest times start_time = time.time() last_times = np.roll(last_times, -1) last_times[-1] = start_time avg_time = np.mean(np.sort(np.diff(last_times))[-5:]) samples_remaining = dataset.num_samples - sample_idx est_time_left = avg_time * samples_remaining # Print progress and time remaining estimate sys.stdout.write('\rSaving {} / {}, Avg Time: {:.3f}s, ' 'Time Remaining: {:.2f}s'.format( sample_idx + 1, dataset.num_samples, avg_time, est_time_left)) sys.stdout.flush() sample_name = dataset.sample_names[sample_idx] img_idx = int(sample_name) ############################## # Proposals ############################## if draw_proposals_separate or draw_overlaid: # Load proposals from files proposals_file_path = proposals_and_scores_dir + \ "/{}/{}.txt".format(global_step, sample_name) if not os.path.exists(proposals_file_path): print('Sample {}: No proposals, skipping'.format(sample_name)) continue print('Sample {}: Drawing proposals'.format(sample_name)) proposals_and_scores = np.loadtxt(proposals_file_path) proposal_boxes_3d = proposals_and_scores[:, 0:7] proposal_scores = proposals_and_scores[:, 7] # Apply score mask to proposals score_mask = proposal_scores > rpn_score_threshold proposal_boxes_3d = proposal_boxes_3d[score_mask] proposal_scores = proposal_scores[score_mask] proposal_objs = \ [box_3d_encoder.box_3d_to_object_label(proposal, obj_type='Proposal') for proposal in proposal_boxes_3d] ############################## # Predictions ############################## if draw_predictions_separate or draw_overlaid: predictions_file_path = predictions_and_scores_dir + \ "/{}/{}.txt".format(global_step, sample_name) if not os.path.exists(predictions_file_path): continue # Load predictions from files predictions_and_scores = np.loadtxt( predictions_and_scores_dir + "/{}/{}.txt".format(global_step, sample_name)) prediction_boxes_3d = predictions_and_scores[:, 0:7] prediction_scores = predictions_and_scores[:, 7] prediction_class_indices = predictions_and_scores[:, 8] # process predictions only if we have any predictions left after # masking if len(prediction_boxes_3d) > 0: # Apply score mask avod_score_mask = prediction_scores >= avod_score_threshold prediction_boxes_3d = prediction_boxes_3d[avod_score_mask] prediction_scores = prediction_scores[avod_score_mask] prediction_class_indices = \ prediction_class_indices[avod_score_mask] # # Swap l, w for predictions where w > l # swapped_indices = \ # prediction_boxes_3d[:, 4] > prediction_boxes_3d[:, 3] # prediction_boxes_3d = np.copy(prediction_boxes_3d) # prediction_boxes_3d[swapped_indices, 3] = \ # prediction_boxes_3d[swapped_indices, 4] # prediction_boxes_3d[swapped_indices, 4] = \ # prediction_boxes_3d[swapped_indices, 3] ############################## # Ground Truth ############################## # Get ground truth labels if dataset.has_labels: gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx) else: gt_objects = [] # Filter objects to desired difficulty filtered_gt_objs = dataset.kitti_utils.filter_labels( gt_objects, classes=gt_classes) boxes2d, _, _ = obj_utils.build_bbs_from_objects( filtered_gt_objs, class_needed=gt_classes) image_path = dataset.get_rgb_image_path(sample_name) image = Image.open(image_path) image_size = image.size # Read the stereo calibration matrix for visualization stereo_calib = calib_utils.read_calibration(dataset.calib_dir, img_idx) calib_p2 = stereo_calib.p2 ############################## # Reformat and prepare to draw ############################## if draw_proposals_separate or draw_overlaid: proposals_as_anchors = box_3d_encoder.box_3d_to_anchor( proposal_boxes_3d) proposal_boxes, _ = anchor_projector.project_to_image_space( proposals_as_anchors, calib_p2, image_size) num_of_proposals = proposal_boxes_3d.shape[0] prop_fig, prop_2d_axes, prop_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False) draw_proposals(filtered_gt_objs, calib_p2, num_of_proposals, proposal_objs, proposal_boxes, prop_2d_axes, prop_3d_axes, draw_orientations_on_prop) if draw_proposals_separate: # Save just the proposals filename = prop_out_dir + '/' + sample_name + '.png' plt.savefig(filename) if not draw_overlaid: plt.close(prop_fig) if draw_overlaid or draw_predictions_separate: if len(prediction_boxes_3d) > 0: # Project the 3D box predictions to image space image_filter = [] final_boxes_2d = [] for i in range(len(prediction_boxes_3d)): box_3d = prediction_boxes_3d[i, 0:7] img_box = box_3d_projector.project_to_image_space( box_3d, calib_p2, truncate=True, image_size=image_size, discard_before_truncation=False) if img_box is not None: image_filter.append(True) final_boxes_2d.append(img_box) else: image_filter.append(False) final_boxes_2d = np.asarray(final_boxes_2d) final_prediction_boxes_3d = prediction_boxes_3d[image_filter] final_scores = prediction_scores[image_filter] final_class_indices = prediction_class_indices[image_filter] num_of_predictions = final_boxes_2d.shape[0] # Convert to objs final_prediction_objs = \ [box_3d_encoder.box_3d_to_object_label( prediction, obj_type='Prediction') for prediction in final_prediction_boxes_3d] for (obj, score) in zip(final_prediction_objs, final_scores): obj.score = score else: if save_empty_images: pred_fig, pred_2d_axes, pred_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False, fig_size=fig_size) filename = pred_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(pred_fig) continue if draw_overlaid: # Overlay prediction boxes on image draw_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, prop_2d_axes, prop_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred) filename = overlaid_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(prop_fig) if draw_predictions_separate: # Now only draw prediction boxes on images # on a new figure handler if draw_projected_2d_boxes: pred_fig, pred_2d_axes, pred_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False, fig_size=fig_size) draw_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, pred_2d_axes, pred_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred) else: pred_fig, pred_3d_axes = \ vis_utils.visualize_single_plot( dataset.rgb_image_dir, img_idx, display=False) draw_3d_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, pred_3d_axes, draw_score, draw_iou, gt_classes, draw_orientations_on_pred) filename = pred_out_dir + '/' + sample_name + '.png' plt.savefig(filename) plt.close(pred_fig) print('\nDone')
def _fill_anchor_pl_inputs(self, anchors_info, ground_plane, image_shape, stereo_calib_p2, sample_name, sample_augs): """ Fills anchor placeholder inputs with corresponding data Args: anchors_info: anchor info from mini_batch_utils ground_plane: ground plane coefficients image_shape: image shape (h, w), used for projecting anchors sample_name: name of the sample, e.g. "000001" sample_augs: list of sample augmentations """ # Lists for merging anchors info all_anchor_boxes_3d = [] anchors_ious = [] anchor_offsets = [] anchor_classes = [] # Create anchors for each class if len(self.dataset.classes) > 1: for class_idx in range(len(self.dataset.classes)): # Generate anchors for all classes grid_anchor_boxes_3d = self._anchor_generator.generate( area_3d=self._area_extents, anchor_3d_sizes=self._cluster_sizes[class_idx], anchor_stride=self._anchor_strides[class_idx], ground_plane=ground_plane) all_anchor_boxes_3d.append(grid_anchor_boxes_3d) all_anchor_boxes_3d = np.concatenate(all_anchor_boxes_3d) else: # Don't loop for a single class class_idx = 0 grid_anchor_boxes_3d = self._anchor_generator.generate( area_3d=self._area_extents, anchor_3d_sizes=self._cluster_sizes[class_idx], anchor_stride=self._anchor_strides[class_idx], ground_plane=ground_plane) all_anchor_boxes_3d = grid_anchor_boxes_3d # Filter empty anchors # Skip if anchors_info is [] sample_has_labels = True if self._train_val_test in ['train', 'val']: # Read in anchor info during training / validation if anchors_info: anchor_indices, anchors_ious, anchor_offsets, \ anchor_classes = anchors_info anchor_boxes_3d_to_use = all_anchor_boxes_3d[anchor_indices] else: train_cond = (self._train_val_test == "train" and self._train_on_all_samples) eval_cond = (self._train_val_test == "val" and self._eval_all_samples) if train_cond or eval_cond: sample_has_labels = False else: sample_has_labels = False if not sample_has_labels: # During testing, or validation with no anchor info, manually # filter empty anchors # TODO: share voxel_grid_2d with BEV generation if possible voxel_grid_2d = \ self.dataset.kitti_utils.create_sliced_voxel_grid_2d( sample_name, self.dataset.bev_source, image_shape=image_shape) # Convert to anchors and filter anchors_to_use = box_3d_encoder.box_3d_to_anchor( all_anchor_boxes_3d) empty_filter = anchor_filter.get_empty_anchor_filter_2d( anchors_to_use, voxel_grid_2d, density_threshold=1) anchor_boxes_3d_to_use = all_anchor_boxes_3d[empty_filter] # Convert lists to ndarrays anchor_boxes_3d_to_use = np.asarray(anchor_boxes_3d_to_use) anchors_ious = np.asarray(anchors_ious) anchor_offsets = np.asarray(anchor_offsets) anchor_classes = np.asarray(anchor_classes) # Flip anchors and centroid x offsets for augmented samples if kitti_aug.AUG_FLIPPING in sample_augs: anchor_boxes_3d_to_use = kitti_aug.flip_boxes_3d( anchor_boxes_3d_to_use, flip_ry=False) if anchors_info: anchor_offsets[:, 0] = -anchor_offsets[:, 0] # Convert to anchors anchors_to_use = box_3d_encoder.box_3d_to_anchor( anchor_boxes_3d_to_use) num_anchors = len(anchors_to_use) # Project anchors into bev bev_anchors, bev_anchors_norm = anchor_projector.project_to_bev( anchors_to_use, self._bev_extents) # Project box_3d anchors into image space img_anchors, img_anchors_norm = \ anchor_projector.project_to_image_space( anchors_to_use, stereo_calib_p2, image_shape) # Reorder into [y1, x1, y2, x2] for tf.crop_and_resize op self._bev_anchors_norm = bev_anchors_norm[:, [1, 0, 3, 2]] self._img_anchors_norm = img_anchors_norm[:, [1, 0, 3, 2]] # Fill in placeholder inputs self._placeholder_inputs[self.PL_ANCHORS] = anchors_to_use # If we are in train/validation mode, and the anchor infos # are not empty, store them. Checking for just anchors_ious # to be non-empty should be enough. if self._train_val_test in ['train', 'val'] and \ len(anchors_ious) > 0: self._placeholder_inputs[self.PL_ANCHOR_IOUS] = anchors_ious self._placeholder_inputs[self.PL_ANCHOR_OFFSETS] = anchor_offsets self._placeholder_inputs[self.PL_ANCHOR_CLASSES] = anchor_classes # During test, or val when there is no anchor info elif self._train_val_test in ['test'] or \ len(anchors_ious) == 0: # During testing, or validation with no gt, fill these in with 0s self._placeholder_inputs[self.PL_ANCHOR_IOUS] = \ np.zeros(num_anchors) self._placeholder_inputs[self.PL_ANCHOR_OFFSETS] = \ np.zeros([num_anchors, 6]) self._placeholder_inputs[self.PL_ANCHOR_CLASSES] = \ np.zeros(num_anchors) else: raise ValueError( 'Got run mode {}, and non-empty anchor info'.format( self._train_val_test)) self._placeholder_inputs[self.PL_BEV_ANCHORS] = bev_anchors self._placeholder_inputs[self.PL_BEV_ANCHORS_NORM] = \ self._bev_anchors_norm self._placeholder_inputs[self.PL_IMG_ANCHORS] = img_anchors self._placeholder_inputs[self.PL_IMG_ANCHORS_NORM] = \ self._img_anchors_norm
def _calculate_anchors_info(self, all_anchor_boxes_3d, empty_anchor_filter, gt_labels): """Calculates the list of anchor information in the format: N x 8 [max_gt_2d_iou, max_gt_3d_iou, (6 x offsets), class_index] max_gt_out - highest 3D iou with any ground truth box offsets - encoded offsets [dx, dy, dz, d_dimx, d_dimy, d_dimz] class_index - the anchor's class as an index (e.g. 0 or 1, for "Background" or "Car") Args: all_anchor_boxes_3d: list of anchors in box_3d format N x [x, y, z, l, w, h, ry] empty_anchor_filter: boolean mask of which anchors are non empty gt_labels: list of Object Label data format containing ground truth labels to generate positives/negatives from. Returns: list of anchor info """ # Check for ground truth objects if len(gt_labels) == 0: raise Warning("No valid ground truth label to generate anchors.") kitti_utils = self._dataset.kitti_utils # Filter empty anchors anchor_indices = np.where(empty_anchor_filter)[0] anchor_boxes_3d = all_anchor_boxes_3d[empty_anchor_filter] # Convert anchor_boxes_3d to anchor format anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d) # Convert gt to boxes_3d -> anchors -> iou format gt_boxes_3d = np.asarray( [box_3d_encoder.object_label_to_box_3d(gt_obj) for gt_obj in gt_labels]) gt_anchors = box_3d_encoder.box_3d_to_anchor(gt_boxes_3d, ortho_rotate=True) rpn_iou_type = self.mini_batch_utils.rpn_iou_type if rpn_iou_type == '2d': # Convert anchors to 2d iou format anchors_for_2d_iou, _ = np.asarray(anchor_projector.project_to_bev( anchors, kitti_utils.bev_extents)) gt_boxes_for_2d_iou, _ = anchor_projector.project_to_bev( gt_anchors, kitti_utils.bev_extents) elif rpn_iou_type == '3d': # Convert anchors to 3d iou format for calculation anchors_for_3d_iou = box_3d_encoder.box_3d_to_3d_iou_format( anchor_boxes_3d) gt_boxes_for_3d_iou = \ box_3d_encoder.box_3d_to_3d_iou_format(gt_boxes_3d) else: raise ValueError('Invalid rpn_iou_type {}', rpn_iou_type) # Initialize sample and offset lists num_anchors = len(anchor_boxes_3d) all_info = np.zeros((num_anchors, self.mini_batch_utils.col_length)) # Update anchor indices all_info[:, self.mini_batch_utils.col_anchor_indices] = anchor_indices # For each of the labels, generate samples for gt_idx in range(len(gt_labels)): gt_obj = gt_labels[gt_idx] gt_box_3d = gt_boxes_3d[gt_idx] # Get 2D or 3D IoU for every anchor if self.mini_batch_utils.rpn_iou_type == '2d': gt_box_for_2d_iou = gt_boxes_for_2d_iou[gt_idx] ious = evaluation.two_d_iou(gt_box_for_2d_iou, anchors_for_2d_iou) elif self.mini_batch_utils.rpn_iou_type == '3d': gt_box_for_3d_iou = gt_boxes_for_3d_iou[gt_idx] ious = evaluation.three_d_iou(gt_box_for_3d_iou, anchors_for_3d_iou) # Only update indices with a higher iou than before update_indices = np.greater( ious, all_info[:, self.mini_batch_utils.col_ious]) # Get ious to update ious_to_update = ious[update_indices] # Calculate offsets, use 3D iou to get highest iou anchors_to_update = anchors[update_indices] gt_anchor = box_3d_encoder.box_3d_to_anchor(gt_box_3d, ortho_rotate=True) offsets = anchor_encoder.anchor_to_offset(anchors_to_update, gt_anchor) # Convert gt type to index class_idx = kitti_utils.class_str_to_index(gt_obj.type) # Update anchors info (indices already updated) # [index, iou, (offsets), class_index] all_info[update_indices, self.mini_batch_utils.col_ious] = ious_to_update all_info[update_indices, self.mini_batch_utils.col_offsets_lo: self.mini_batch_utils.col_offsets_hi] = offsets all_info[update_indices, self.mini_batch_utils.col_class_idx] = class_idx return all_info
def load_samples_from_file(self, image_path, lidar_path, calib_dir): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] sample = self.sample_list[0] sample_name = sample.name obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(image_path) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane ground_plane = obj_utils.get_road_plane_from_file(calib_dir) # Get calibration stereo_calib = calib_utils.read_raw_calibration(calib_dir) stereo_calib_p2 = stereo_calib.p2 point_cloud = self.kitti_utils.get_point_cloud_from_file( self.bev_source, stereo_calib, lidar_path, image_shape) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps(point_cloud, ground_plane) height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def np_box_3d_to_box_8c(box_3d): """Computes the 3D bounding box corner positions from box_3d format. This function does not preserve corners order but rather the corners are rotated to the nearest 90 degree angle. This helps in calculating the closest corner to corner when comparing the corners to the ground- truth boxes. Args: box_3d: ndarray of size (7,) representing box_3d in the format [x, y, z, l, w, h, ry] Returns: corners_3d: An ndarray or a tensor of shape (3 x 8) representing the box as corners in following format -> [[x1,...,x8],[y1...,y8], [z1,...,z8]]. """ format_checker.check_box_3d_format(box_3d) # This function is vectorized and returns an ndarray anchor = box_3d_encoder.box_3d_to_anchor(box_3d, ortho_rotate=True)[0] centroid_x = anchor[0] centroid_y = anchor[1] centroid_z = anchor[2] dim_x = anchor[3] dim_y = anchor[4] dim_z = anchor[5] half_dim_x = dim_x / 2 half_dim_z = dim_z / 2 # 3D BB corners x_corners = np.array([ half_dim_x, half_dim_x, -half_dim_x, -half_dim_x, half_dim_x, half_dim_x, -half_dim_x, -half_dim_x ]) y_corners = np.array([0.0, 0.0, 0.0, 0.0, -dim_y, -dim_y, -dim_y, -dim_y]) z_corners = np.array([ half_dim_z, -half_dim_z, -half_dim_z, half_dim_z, half_dim_z, -half_dim_z, -half_dim_z, half_dim_z ]) ry = box_3d[6] # Find nearest 90 degree half_pi = np.pi / 2 ortho_ry = np.round(ry / half_pi) * half_pi # Find rotation to make the box ortho aligned ry_diff = ry - ortho_ry # Compute transform matrix # This includes rotation and translation rot = np.array([[np.cos(ry_diff), 0, np.sin(ry_diff), centroid_x], [0, 1, 0, centroid_y], [-np.sin(ry_diff), 0, np.cos(ry_diff), centroid_z]]) # Create a ones column ones_col = np.ones(x_corners.shape) # Append the column of ones to be able to multiply box_8c = np.dot(rot, np.array([x_corners, y_corners, z_corners, ones_col])) # Ignore the fourth column box_8c = box_8c[0:3] return box_8c
def preprocess(self, indices): """Preprocesses anchor info and saves info to files Args: indices (int array): sample indices to process. If None, processes all samples """ # Get anchor stride for class anchor_strides = self._anchor_strides dataset = self._dataset dataset_utils = self._dataset.kitti_utils classes_name = dataset.classes_name # Make folder if it doesn't exist yet output_dir = self.mini_batch_utils.get_file_path(classes_name, anchor_strides, sample_name=None) os.makedirs(output_dir, exist_ok=True) # Get clusters for class all_clusters_sizes, _ = dataset.get_cluster_info() anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() # Load indices of data_split all_samples = dataset.sample_list if indices is None: indices = np.arange(len(all_samples)) num_samples = len(indices) # For each image in the dataset, save info on the anchors for sample_idx in indices: # Get image name for given cluster sample_name = all_samples[sample_idx].name img_idx = int(sample_name) # Check for existing files and skip to the next if self._check_for_existing(classes_name, anchor_strides, sample_name): print("{} / {}: Sample already preprocessed".format( sample_idx + 1, num_samples, sample_name)) continue # Get ground truth and filter based on difficulty ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx) # Filter objects to dataset classes filtered_gt_list = dataset_utils.filter_labels(ground_truth_list) filtered_gt_list = np.asarray(filtered_gt_list) # Filtering by class has no valid ground truth, skip this image if len(filtered_gt_list) == 0: print("{} / {} No {}s for sample {} " "(Ground Truth Filter)".format( sample_idx + 1, num_samples, classes_name, sample_name)) # Output an empty file and move on to the next image. self._save_to_file(classes_name, anchor_strides, sample_name) continue # Get ground plane ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir) image = Image.open(dataset.get_rgb_image_path(sample_name)) image_shape = [image.size[1], image.size[0]] # Generate sliced 2D voxel grid for filtering vx_grid_2d = dataset_utils.create_sliced_voxel_grid_2d( sample_name, source=dataset.bev_source, image_shape=image_shape) # List for merging all anchors all_anchor_boxes_3d = [] # Create anchors for each class for class_idx in range(len(dataset.classes)): # Generate anchors for all classes grid_anchor_boxes_3d = anchor_generator.generate( area_3d=self._area_extents, anchor_3d_sizes=all_clusters_sizes[class_idx], anchor_stride=self._anchor_strides[class_idx], ground_plane=ground_plane) all_anchor_boxes_3d.extend(grid_anchor_boxes_3d) # Filter empty anchors all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d) anchors = box_3d_encoder.box_3d_to_anchor(all_anchor_boxes_3d) empty_anchor_filter = anchor_filter.get_empty_anchor_filter_2d( anchors, vx_grid_2d, self._density_threshold) # Calculate anchor info anchors_info = self._calculate_anchors_info( all_anchor_boxes_3d, empty_anchor_filter, filtered_gt_list) anchor_ious = anchors_info[:, self.mini_batch_utils.col_ious] valid_iou_indices = np.where(anchor_ious > 0.0)[0] print("{} / {}:" "{:>6} anchors, " "{:>6} iou > 0.0, " "for {:>3} {}(s) for sample {}".format( sample_idx + 1, num_samples, len(anchors_info), len(valid_iou_indices), len(filtered_gt_list), classes_name, sample_name )) # Save anchors info self._save_to_file(classes_name, anchor_strides, sample_name, anchors_info)
def load_samples(self, indices, sin_type=None, sin_level=None, sin_input_name=None, gen_all_sin_inputs=False, list_mask_2d=None): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for idx, sample_idx in enumerate(indices): sample = self.sample_list[sample_idx] sample_name = sample.name if list_mask_2d: mask_2d = list_mask_2d[idx] else: mask_2d = None # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib_p2 = calib_utils.read_calibration( self.calib_dir, int(sample_name)).p2 # Read lidar with subsampling (handled before other preprocessing) if (sin_type == 'lowres') and (sin_input_name == 'lidar'): stride_sub = get_stride_sub(sin_level) point_cloud = get_point_cloud_sub(img_idx, self.calib_dir, self.velo_dir, image_shape, stride_sub) elif (sin_type == 'lowres') and gen_all_sin_inputs: stride_sub = get_stride_sub(sin_level) point_cloud = get_point_cloud_sub(img_idx, self.calib_dir, self.velo_dir, image_shape, stride_sub) else: point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) # Add Single Input Noise if (sin_input_name in SINFields.SIN_INPUT_NAMES) and ( sin_type in SINFields.VALID_SIN_TYPES): image_input, point_cloud = genSINtoInputs( image_input, point_cloud, sin_type=sin_type, sin_level=sin_level, sin_input_name=sin_input_name, mask_2d=mask_2d, frame_calib_p2=stereo_calib_p2) # Add Input Noise to all if gen_all_sin_inputs: image_input, point_cloud = genSINtoAllInputs( image_input, point_cloud, sin_type=sin_type, sin_level=sin_level, mask_2d=mask_2d, frame_calib_p2=stereo_calib_p2) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane) height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts
def test_get_empty_anchor_filter_in_2d(self): # create generic ground plane (normal vector is straight up) area_extent = [(0., 2.), (-1., 0.), (0., 2.)] # Creates a voxel grid in following format at y = bin (-1.5, -0.5] # [ ][ ][ ][ ] # [ ][ ][x][ ] # [ ][ ][ ][ ] # [ ][ ][x][ ] pts = np.array([[0.51, -0.5, 1.1], [1.51, -0.5, 1.1]]) voxel_size = 0.5 voxel_grid = VoxelGrid() voxel_grid.voxelize(pts, voxel_size, extents=area_extent) # Define anchors to test boxes_3d = np.array([ [0.51, 0, 0.51, 1, 1, 1, 0], [0.51, 0, 0.51, 1, 1, 1, np.pi / 2.], [0.51, 0, 1.1, 1, 1, 1, 0], [0.51, 0, 1.1, 1, 1, 1, np.pi / 2.], [1.51, 0, 0.51, 1, 1, 1, 0], [1.51, 0, 0.51, 1, 1, 1, np.pi / 2.], [1.51, 0, 1.1, 1, 1, 1, 0], [1.51, 0, 1.1, 1, 1, 1, np.pi / 2.], ]) anchors = box_3d_encoder.box_3d_to_anchor(boxes_3d) # test anchor locations, number indicates the anchors indices # [ ][ ][ ][ ] # [ ][1][3][ ] # [ ][ ][ ][ ] # [ ][5][7][ ] gen_filter = anchor_filter.get_empty_anchor_filter(anchors, voxel_grid, density_threshold=1) expected_filter = np.array( [False, False, True, True, False, False, True, True]) self.assertTrue((gen_filter == expected_filter).all()) boxes_3d = np.array([ [0.5, 0, 0.5, 2, 1, 1, 0], # case 1 [0.5, 0, 0.5, 2, 1, 1, np.pi / 2.], [0.5, 0, 1.5, 1, 2, 1, 0], # case 2 [0.5, 0, 1.5, 1, 2, 1, np.pi / 2.], [1.5, 0, 0.5, 2, 1, 1, 0], # case 3 [1.5, 0, 0.5, 2, 1, 1, np.pi / 2.], [1.5, 0, 1.5, 1, 2, 1, 0], # case 4 [1.5, 0, 1.5, 1, 2, 1, np.pi / 2.] ]) anchors = box_3d_encoder.box_3d_to_anchor(boxes_3d) # case 1 # [ ][ ][ ][ ] [ ][ ][ ][ ] # [ ][o][ ][ ] [ ][o][o][ ] # [ ][o][ ][ ] [ ][ ][ ][ ] # [ ][ ][ ][ ] [ ][ ][ ][ ] # case 2 # [ ][ ][ ][ ] [ ][ ][ ][ ] # [ ][ ][o][o] [ ][ ][o][ ] # [ ][ ][ ][ ] [ ][ ][o][ ] # [ ][ ][ ][ ] [ ][ ][ ][ ] # case 3 # [ ][ ][ ][ ] [ ][ ][ ][ ] # [ ][ ][ ][ ] [ ][ ][ ][ ] # [ ][o][ ][ ] [ ][o][o][ ] # [ ][o][ ][ ] [ ][ ][ ][ ] # case 4 # [ ][ ][ ][ ] [ ][ ][ ][ ] # [ ][ ][ ][ ] [ ][ ][ ][ ] # [ ][ ][o][o] [ ][ ][o][ ] # [ ][ ][ ][ ] [ ][ ][o][ ] gen_filter = anchor_filter.get_empty_anchor_filter(anchors, voxel_grid, density_threshold=1) expected_filter = np.array( [False, True, True, True, False, True, True, True]) self.assertTrue((gen_filter == expected_filter).all())
def np_box_3d_to_box_4c(box_3d, ground_plane): """Converts a single box_3d to box_4c Args: box_3d: box_3d (6,) ground_plane: ground plane coefficients (4,) Returns: box_4c (10,) """ format_checker.check_box_3d_format(box_3d) anchor = box_3d_encoder.box_3d_to_anchor(box_3d, ortho_rotate=True)[0] centroid_x = anchor[0] centroid_y = anchor[1] centroid_z = anchor[2] dim_x = anchor[3] dim_y = anchor[4] dim_z = anchor[5] # Create temporary box at (0, 0) for rotation half_dim_x = dim_x / 2 half_dim_z = dim_z / 2 # Box corners x_corners = np.asarray([half_dim_x, half_dim_x, -half_dim_x, -half_dim_x]) z_corners = np.array([half_dim_z, -half_dim_z, -half_dim_z, half_dim_z]) ry = box_3d[6] # Find nearest 90 degree half_pi = np.pi / 2 ortho_ry = np.round(ry / half_pi) * half_pi # Find rotation to make the box ortho aligned ry_diff = ry - ortho_ry # Create transformation matrix, including rotation and translation tr_mat = np.array([[np.cos(ry_diff), np.sin(ry_diff), centroid_x], [-np.sin(ry_diff), np.cos(ry_diff), centroid_z], [0, 0, 1]]) # Create a ones row ones_row = np.ones(x_corners.shape) # Append the column of ones to be able to multiply points_stacked = np.vstack([x_corners, z_corners, ones_row]) corners = np.matmul(tr_mat, points_stacked) # Discard the last row (ones) corners = corners[0:2] # Calculate height off ground plane ground_y = geometry_utils.calculate_plane_point( ground_plane, [centroid_x, None, centroid_z])[1] h1 = ground_y - centroid_y h2 = h1 + dim_y # Stack into (10,) ndarray box_4c = np.hstack([corners.flatten(), h1, h2]) return box_4c
def main(): """ Visualization of 3D grid anchor generation, showing 2D projections in BEV and image space, and a 3D display of the anchors """ dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_TRAIN) dataset_config.num_clusters[0] = 1 dataset = DatasetBuilder.build_kitti_dataset(dataset_config) label_cluster_utils = LabelClusterUtils(dataset) clusters, _ = label_cluster_utils.get_clusters() # Options img_idx = 1 # fake_clusters = np.array([[5, 4, 3], [6, 5, 4]]) # fake_clusters = np.array([[3, 3, 3], [4, 4, 4]]) fake_clusters = np.array([[4, 2, 3]]) fake_anchor_stride = [5.0, 5.0] ground_plane = [0, -1, 0, 1.72] anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() area_extents = np.array([[-40, 40], [-5, 5], [0, 70]]) # Generate anchors for cars only start_time = time.time() anchor_boxes_3d = anchor_3d_generator.generate( area_3d=dataset.kitti_utils.area_extents, anchor_3d_sizes=fake_clusters, anchor_stride=fake_anchor_stride, ground_plane=ground_plane) all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d) end_time = time.time() print("Anchors generated in {} s".format(end_time - start_time)) # Project into bev bev_boxes, bev_normalized_boxes = \ anchor_projector.project_to_bev(all_anchors, area_extents[[0, 2]]) bev_fig, (bev_axes, bev_normalized_axes) = \ plt.subplots(1, 2, figsize=(16, 7)) bev_axes.set_xlim(0, 80) bev_axes.set_ylim(70, 0) bev_normalized_axes.set_xlim(0, 1.0) bev_normalized_axes.set_ylim(1, 0.0) plt.show(block=False) for box in bev_boxes: box_w = box[2] - box[0] box_h = box[3] - box[1] rect = patches.Rectangle((box[0], box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_axes.add_patch(rect) for normalized_box in bev_normalized_boxes: box_w = normalized_box[2] - normalized_box[0] box_h = normalized_box[3] - normalized_box[1] rect = patches.Rectangle((normalized_box[0], normalized_box[1]), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') bev_normalized_axes.add_patch(rect) rgb_fig, rgb_2d_axes, rgb_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx) plt.show(block=False) image_path = dataset.get_rgb_image_path(dataset.sample_names[img_idx]) image_shape = np.array(Image.open(image_path)).shape stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir, img_idx).p2 start_time = time.time() rgb_boxes, rgb_normalized_boxes = \ anchor_projector.project_to_image_space(all_anchors, stereo_calib_p2, image_shape) end_time = time.time() print("Anchors projected in {} s".format(end_time - start_time)) # Read the stereo calibration matrix for visualization stereo_calib = calib_utils.read_calibration(dataset.calib_dir, 0) p = stereo_calib.p2 # Overlay boxes on images for anchor_idx in range(len(anchor_boxes_3d)): anchor_box_3d = anchor_boxes_3d[anchor_idx] obj_label = box_3d_encoder.box_3d_to_object_label(anchor_box_3d) # Draw 3D boxes vis_utils.draw_box_3d(rgb_3d_axes, obj_label, p) # Draw 2D boxes rgb_box_2d = rgb_boxes[anchor_idx] box_x1 = rgb_box_2d[0] box_y1 = rgb_box_2d[1] box_w = rgb_box_2d[2] - box_x1 box_h = rgb_box_2d[3] - box_y1 rect = patches.Rectangle((box_x1, box_y1), box_w, box_h, linewidth=2, edgecolor='b', facecolor='none') rgb_2d_axes.add_patch(rect) if anchor_idx % 32 == 0: rgb_fig.canvas.draw() plt.show(block=True)
def load_samples(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Only read labels if they exist if self.has_labels: # Read mini batch first to see if it is empty anchors_info = self.get_anchors_info(sample_name) if (not anchors_info) and self.train_val_test == 'train' \ and (not self.train_on_all_samples): empty_sample_dict = { constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_ANCHORS_INFO: anchors_info } return [empty_sample_dict] obj_labels = obj_utils.read_labels(self.label_dir, int(sample_name)) # Only use objects that match dataset classes obj_labels = self.kitti_utils.filter_labels(obj_labels) else: obj_labels = None anchors_info = [] label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) img_idx = int(sample_name) # Load image (BGR -> RGB) cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = cv_bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get ground plane ground_plane = obj_utils.get_road_plane(int(sample_name), self.planes_dir) # Get calibration stereo_calib = calib_utils.read_calibration( self.calib_dir, int(sample_name)) stereo_calib_p2 = stereo_calib.p2 point_cloud = self.kitti_utils.get_point_cloud( self.bev_source, img_idx, image_shape) # Augmentation (Flipping) # WZN: the flipping augmentation flips both image(in camera frame), pointcloud (in Lidar frame), and calibration #matrix(between cam and Lidar) so the correspondence is still true. if kitti_aug.AUG_FLIPPING in sample.augs: image_input = kitti_aug.flip_image(image_input) point_cloud = kitti_aug.flip_point_cloud(point_cloud) obj_labels = [ kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels ] ground_plane = kitti_aug.flip_ground_plane(ground_plane) stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2( stereo_calib_p2, image_shape) # Augmentation (Image Jitter) if kitti_aug.AUG_PCA_JITTER in sample.augs: image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(image_input[:, :, 0:3]) if obj_labels is not None: label_boxes_3d = np.asarray([ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ]) label_classes = [ self.kitti_utils.class_str_to_index(obj_label.type) for obj_label in obj_labels ] label_classes = np.asarray(label_classes, dtype=np.int32) # Return empty anchors_info if no ground truth after filtering if len(label_boxes_3d) == 0: anchors_info = [] if self.train_on_all_samples: # If training without any positive labels, we cannot # set these to zeros, because later on the offset calc # uses log on these anchors. So setting any arbitrary # number here that does not break the offset calculation # should work, since the negative samples won't be # regressed in any case. dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]] label_anchors = np.asarray(dummy_anchors) dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]] label_boxes_3d = np.asarray(dummy_boxes) else: label_anchors = np.zeros((1, 6)) label_boxes_3d = np.zeros((1, 7)) label_classes = np.zeros(1) else: label_anchors = box_3d_encoder.box_3d_to_anchor( label_boxes_3d, ortho_rotate=True) # Create BEV maps bev_images = self.kitti_utils.create_bev_maps( point_cloud, ground_plane, output_indices=self.output_indices) #WZN produce input for sparse pooling if self.output_indices: voxel_indices = bev_images[1] pts_in_voxel = bev_images[2] bev_images = bev_images[0] height_maps = bev_images.get('height_maps') density_map = bev_images.get('density_map') bev_input = np.dstack((*height_maps, density_map)) #import pdb #pdb.set_trace() #WZN produce input for sparse pooling if self.output_indices: sparse_pooling_input1 = produce_sparse_pooling_input( gen_sparse_pooling_input_avod( pts_in_voxel, voxel_indices, stereo_calib, [image_shape[1], image_shape[0]], bev_input.shape[0:2]), stride=[1, 1]) #WZN: Note here avod padded the vgg input by 4, so add it bev_input_padded = np.copy(bev_input.shape[0:2]) bev_input_padded[0] = bev_input_padded[0] + 4 sparse_pooling_input2 = produce_sparse_pooling_input( gen_sparse_pooling_input_avod( pts_in_voxel, voxel_indices, stereo_calib, [image_shape[1], image_shape[0]], bev_input_padded), stride=[8, 8]) sparse_pooling_input = [ sparse_pooling_input1, sparse_pooling_input2 ] else: sparse_pooling_input = None sample_dict = { constants.KEY_LABEL_BOXES_3D: label_boxes_3d, constants.KEY_LABEL_ANCHORS: label_anchors, constants.KEY_LABEL_CLASSES: label_classes, constants.KEY_IMAGE_INPUT: image_input, constants.KEY_BEV_INPUT: bev_input, #WZN: for sparse pooling constants.KEY_SPARSE_POOLING_INPUT: sparse_pooling_input, constants.KEY_ANCHORS_INFO: anchors_info, constants.KEY_POINT_CLOUD: point_cloud, constants.KEY_GROUND_PLANE: ground_plane, constants.KEY_STEREO_CALIB_P2: stereo_calib_p2, constants.KEY_SAMPLE_NAME: sample_name, constants.KEY_SAMPLE_AUGS: sample.augs } sample_dicts.append(sample_dict) return sample_dicts