def main(): # Paths kitti_dir = os.path.expanduser('~/Kitti/object/') data_split_dir = 'training' image_dir = os.path.join(kitti_dir, data_split_dir) + '/image_2' label_dir = os.path.join(kitti_dir, data_split_dir) + '/label_2' calib_dir = os.path.join(kitti_dir, data_split_dir) + '/calib' sample_name = '000050' frame_calib = calib_utils.get_frame_calib(calib_dir, sample_name) cam_p = frame_calib.p2 f, axes = vis_utils.plots_from_sample_name(image_dir, sample_name, 2, 1) # Load labels obj_labels = obj_utils.read_labels(label_dir, sample_name) for obj in obj_labels: # Draw 2D and 3D boxes vis_utils.draw_obj_as_box_2d(axes[0], obj) vis_utils.draw_obj_as_box_3d(axes[1], obj, cam_p) plt.show(block=True)
def test_filter_obj_labels(self): sample_name = '000050' obj_labels = obj_utils.read_labels(self.dataset.kitti_label_dir, sample_name) obj_labels_filt, obj_mask = obj_utils.filter_labels( obj_labels, classes=['Car'], depth_range=[5, 45]) self.assertTrue(len(obj_labels_filt) == 3) np.testing.assert_equal(obj_mask, [True, True, False, True, False])
def main(): ############################## # Options ############################## dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL) num_jittered_boxes = 5 iou_thresh = 0.7 classes = ['Car'] sample_name = '000050' ############################## # Get filtered labels dataset.classes = classes obj_labels = obj_utils.read_labels(dataset.kitti_label_dir, sample_name) obj_labels, class_filter = obj_utils.filter_labels(obj_labels, classes=dataset.classes) # Image shape bgr_image = cv2.imread(dataset.get_rgb_image_path(sample_name)) rgb_image = bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] # Generate jittered boxes aug_labels = [] for label in obj_labels: for i in range(num_jittered_boxes): aug_label = kitti_aug.jitter_obj_boxes_2d([label], iou_thresh, image_shape) aug_labels.append(aug_label[0]) # Visualize boxes fig, axes = vis_utils.plots_from_image(rgb_image, display=False) # Draw non-augmented boxes in red for obj in obj_labels: vis_utils.draw_obj_as_box_2d(axes, obj, color='r') # Draw augmented boxes in cyan for obj in aug_labels: vis_utils.draw_obj_as_box_2d(axes, obj, color='c', linewidth=1) plt.show(block=True)
def get_gts_based_on_difficulty(dataset, sample_name): """Returns lists of ground-truth based on difficulty. """ # Get all ground truth labels and filter to dataset classes all_gt_objs = obj_utils.read_labels(dataset.kitti_label_dir, sample_name) gt_objs, _ = obj_utils.filter_labels_by_class(all_gt_objs, dataset.classes) # Filter objects to desired difficulty easy_gt_objs, _ = obj_utils.filter_labels_by_difficulty( copy.deepcopy(gt_objs), difficulty=Difficulty.EASY) medium_gt_objs, _ = obj_utils.filter_labels_by_difficulty( copy.deepcopy(gt_objs), difficulty=Difficulty.MODERATE) hard_gt_objs, _ = obj_utils.filter_labels_by_difficulty( copy.deepcopy(gt_objs), difficulty=Difficulty.HARD) for gt_obj in easy_gt_objs: gt_obj.type = 'Easy GT' for gt_obj in medium_gt_objs: gt_obj.type = 'Medium GT' for gt_obj in hard_gt_objs: gt_obj.type = 'Hard GT' return easy_gt_objs, medium_gt_objs, hard_gt_objs, all_gt_objs
def main(): ############################## # Options ############################## point_cloud_source = 'depth_2_multiscale' samples_to_use = None # all samples dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL) out_instance_dir = 'outputs/instance_2_{}'.format(point_cloud_source) required_classes = [ 'Car', 'Pedestrian', 'Cyclist', 'Van', 'Truck', 'Person_sitting', 'Tram', 'Misc', ] ############################## # End of Options ############################## # Create instance folder os.makedirs(out_instance_dir, exist_ok=True) # Get frame ids to process if samples_to_use is None: samples_to_use = dataset.get_sample_names() # Begin instance mask generation for sample_idx, sample_name in enumerate(samples_to_use): sys.stdout.write( '\r{} / {} Generating {} instances for sample {}'.format( sample_idx, dataset.num_samples - 1, point_cloud_source, sample_name)) # Get image image = obj_utils.get_image(sample_name, dataset.image_2_dir) image_shape = image.shape[0:2] # Get calibration frame_calib = calib_utils.get_frame_calib(dataset.calib_dir, sample_name) # Get point cloud if point_cloud_source.startswith('depth'): point_cloud = obj_utils.get_depth_map_point_cloud( sample_name, frame_calib, dataset.depth_dir) elif point_cloud_source == 'velo': point_cloud = obj_utils.get_lidar_point_cloud_for_cam( sample_name, frame_calib, dataset.velo_dir, image_shape) else: raise ValueError('Invalid point cloud source', point_cloud_source) # Filter according to classes obj_labels = obj_utils.read_labels(dataset.kitti_label_dir, sample_name) obj_labels, _ = obj_utils.filter_labels_by_class( obj_labels, required_classes) # Get 2D and 3D bounding boxes from labels gt_boxes_2d = [ box_3d_encoder.object_label_to_box_2d(obj_label) for obj_label in obj_labels ] gt_boxes_3d = [ box_3d_encoder.object_label_to_box_3d(obj_label) for obj_label in obj_labels ] instance_image = np.full(image_shape, 255, dtype=np.uint8) # Start instance index at 0 and generate instance masks for all boxes inst_idx = 0 for obj_label, box_2d, box_3d in zip(obj_labels, gt_boxes_2d, gt_boxes_3d): # Apply inflation and offset to box_3d modified_box_3d = modify_box_3d(box_3d, obj_label) # Get points in 3D box box_points, mask = obj_utils.points_in_box_3d( modified_box_3d, point_cloud.T) # Get points in 2D box points_in_im = calib_utils.project_pc_to_image( box_points.T, cam_p=frame_calib.p2) mask_2d = \ (points_in_im[0] >= box_2d[1]) & \ (points_in_im[0] <= box_2d[3]) & \ (points_in_im[1] >= box_2d[0]) & \ (points_in_im[1] <= box_2d[2]) if point_cloud_source.startswith('depth'): mask_points_in_im = np.where(mask.reshape(image_shape)) mask_points_in_im = [ mask_points_in_im[0][mask_2d], mask_points_in_im[1][mask_2d] ] instance_pixels = np.asarray( [mask_points_in_im[1], mask_points_in_im[0]]) elif point_cloud_source == 'velo': # image_points = box_utils.project_to_image( # box_points.T, frame.p_left).astype(np.int32) pass # Guarantees that indices don't exceed image dimensions instance_pixels[0, :] = np.clip(instance_pixels[0, :], 0, image_shape[1] - 1) instance_pixels[1, :] = np.clip(instance_pixels[1, :], 0, image_shape[0] - 1) instance_image[instance_pixels[1, :], instance_pixels[0, :]] = np.uint8(inst_idx) inst_idx += 1 # Write image to directory cv2.imwrite(out_instance_dir + '/{}.png'.format(sample_name), instance_image, [cv2.IMWRITE_PNG_COMPRESSION, 1])
def get_sample_dict(self, indices): """ Loads input-output data for a set of samples. Should only be called when a particular sample dict is required. Otherwise, samples should be provided by the next_batch function Args: indices: A list of sample indices from the dataset.sample_list to be loaded Return: samples: a list of data sample dicts """ sample_dicts = [] for sample_idx in indices: sample = self.sample_list[sample_idx] sample_name = sample.name # Load image (BGR -> RGB) bgr_image = cv2.imread(self.get_rgb_image_path(sample_name)) rgb_image = bgr_image[..., ::-1] image_shape = rgb_image.shape[0:2] image_input = rgb_image # Get calibration frame_calib = calib_utils.get_frame_calib(self.calib_dir, sample_name) cam_p = frame_calib.p2 # Only read labels if they exist if self.train_val_test in ['train', 'val']: # Read KITTI object labels kitti_obj_labels = obj_utils.read_labels( self.kitti_label_dir, sample_name) if self.use_mscnn_detections and self.train_val_test == 'val': # Read mscnn obj labels and replace the KITTI obj label box coords and scores mscnn_obj_labels = obj_utils.read_labels( self.mscnn_label_dir, sample_name) obj_labels = obj_utils.merge_kitti_and_mscnn_obj_labels( kitti_obj_labels, mscnn_obj_labels, min_iou=self.mscnn_merge_min_iou, default_score_type='distance') else: obj_labels = kitti_obj_labels num_all_objs = len(obj_labels) # Filter labels obj_labels, obj_mask = obj_utils.apply_obj_filter( obj_labels, self.obj_filter) num_objs = len(obj_labels) if num_objs < 1: sample_dicts.append(None) continue if self.use_mscnn_detections: # Get filtered original kitti_obj_labels kitti_obj_labels, kitti_obj_mask = obj_utils.apply_obj_filter( kitti_obj_labels, self.obj_filter) num_kitti_objs = len(kitti_obj_labels) if num_kitti_objs < 1: sample_dicts.append(None) continue # Load instance masks instance_image = instance_utils.get_instance_image( sample_name, self.instance_dir) instance_masks = instance_utils.get_instance_mask_list( instance_image, num_all_objs) instance_masks = instance_masks[obj_mask] if self.oversample: # Oversample to required number of boxes num_to_oversample = self.num_boxes - num_objs oversample_indices = np.random.choice(num_objs, num_to_oversample, replace=True) oversample_indices = np.hstack( [np.arange(0, num_objs), oversample_indices]) obj_labels = obj_labels[oversample_indices] instance_masks = instance_masks[oversample_indices] # Augmentation if in train mode if self.train_val_test == 'train': # Image augmentation use_image_aug = self.aug_config.use_image_aug if use_image_aug: image_input = kitti_aug.apply_image_noise(rgb_image) # Box jittering box_jitter_type = self.aug_config.box_jitter_type if box_jitter_type is None: pass elif box_jitter_type == 'oversample': # Replace oversampled boxes with jittered boxes if not self.oversample: raise ValueError( 'Must oversample object labels to use {} ' 'box jitter type'.format(box_jitter_type)) aug_labels = kitti_aug.jitter_obj_boxes_2d( obj_labels[num_objs:], 0.7, image_shape) obj_labels[num_objs:] = aug_labels elif box_jitter_type == 'oversample_gt': # Replace oversampled boxes with jittered gt boxes if not self.oversample: raise ValueError( 'Must oversample object labels to use {} ' 'box jitter type'.format(box_jitter_type)) # Get enough gt boxes to jitter gt_num_to_oversample = self.num_boxes - num_objs gt_oversample_indices = np.random.choice( num_kitti_objs, gt_num_to_oversample, replace=True) kitti_obj_labels = kitti_obj_labels[ gt_oversample_indices] aug_labels = kitti_aug.jitter_obj_boxes_2d( kitti_obj_labels, 0.7, image_shape) obj_labels[num_objs:] = aug_labels elif box_jitter_type == 'all': # Apply data augmentation on all labels obj_labels = kitti_aug.jitter_obj_boxes_2d( obj_labels, 0.7, image_shape) else: raise ValueError('Invalid box_jitter_type', box_jitter_type) # TODO: Do this some other way # Get 2D and 3D boxes label_boxes_2d = obj_utils.boxes_2d_from_obj_labels(obj_labels) label_boxes_3d = obj_utils.boxes_3d_from_obj_labels(obj_labels) label_alphas = np.asarray( [obj_label.alpha for obj_label in obj_labels], dtype=np.float32) label_alpha_bins, label_alpha_regs, label_valid_alpha_bins = \ zip(*[orientation_encoder.np_orientation_to_angle_bin( obj_label.alpha, self.num_alpha_bins, self.alpha_bin_overlap) for obj_label in obj_labels]) # Get viewing angles label_viewing_angles_2d = np.asarray([ obj_utils.get_viewing_angle_box_2d(box_2d, cam_p) for box_2d in label_boxes_2d ], dtype=np.float32) label_viewing_angles_3d = np.asarray([ obj_utils.get_viewing_angle_box_3d(box_3d, cam_p) for box_3d in label_boxes_3d ], dtype=np.float32) # Parse class indices label_class_indices = [ obj_utils.class_str_to_index(obj_label.type, self.classes) for obj_label in obj_labels ] label_class_indices = np.expand_dims(np.asarray( label_class_indices, dtype=np.int32), axis=1) label_class_strs = [obj_label.type for obj_label in obj_labels] # Get proposal z centroid offset prop_cen_z_offset_list = np.asarray([ instance_utils.get_prop_cen_z_offset(class_str) for class_str in label_class_strs ]) # Get xyz map in cam_N frame depth_map = obj_utils.get_depth_map(sample_name, self.depth_dir) # Get scores label_scores = np.asarray( [obj_label.score for obj_label in obj_labels], np.float32) # Get lwh average lwh_means = np.asarray([ obj_utils.get_mean_lwh_and_std_dev(class_str)[0] for class_str in label_class_strs ]) elif self.train_val_test == 'test': # Read object test labels obj_labels = obj_utils.read_labels(self.mscnn_label_dir, sample_name) num_objs = len(obj_labels) if num_objs < 1: sample_dicts.append(None) continue # Just filter classes obj_labels, obj_mask = obj_utils.apply_obj_filter( obj_labels, self.obj_filter) num_objs = len(obj_labels) if num_objs < 1: sample_dicts.append(None) continue # Oversample to required number of boxes num_to_oversample = self.num_boxes - num_objs oversample_indices = np.random.choice(num_objs, num_to_oversample, replace=True) oversample_indices = np.hstack( [np.arange(0, num_objs), oversample_indices]) obj_labels = obj_labels[oversample_indices] # Get 2D boxes label_boxes_2d = obj_utils.boxes_2d_from_obj_labels(obj_labels) # Get score label_scores = np.asarray( [obj_label.score for obj_label in obj_labels], np.float32) # Calculate viewing angles label_viewing_angles_2d = np.asarray([ obj_utils.get_viewing_angle_box_2d(box_2d, cam_p) for box_2d in label_boxes_2d ], dtype=np.float32) label_class_indices = [ obj_utils.class_str_to_index(obj_label.type, self.classes) for obj_label in obj_labels ] label_class_indices = np.expand_dims(np.asarray( label_class_indices, dtype=np.int32), axis=1) label_class_strs = [obj_label.type for obj_label in obj_labels] # Get lwh average lwh_means = np.asarray([ obj_utils.get_mean_lwh_and_std_dev(class_str)[0] for class_str in label_class_strs ]) # Get proposal z centroid offset prop_cen_z_offset_list = np.asarray([ instance_utils.get_prop_cen_z_offset(class_str) for class_str in label_class_strs ]) else: raise ValueError('Invalid run mode', self.train_val_test) # Common inputs for all train_val_test modes # Normalize 2D boxes label_boxes_2d_norm = label_boxes_2d / np.tile(image_shape, 2) sample_dict = { constants.SAMPLE_NUM_OBJS: num_objs, constants.SAMPLE_IMAGE_INPUT: image_input, constants.SAMPLE_CAM_P: cam_p, constants.SAMPLE_NAME: sample_name, constants.SAMPLE_LABEL_BOXES_2D_NORM: label_boxes_2d_norm, constants.SAMPLE_LABEL_BOXES_2D: label_boxes_2d, constants.SAMPLE_LABEL_SCORES: label_scores, constants.SAMPLE_LABEL_CLASS_STRS: np.expand_dims(label_class_strs, 1), constants.SAMPLE_LABEL_CLASS_INDICES: label_class_indices, constants.SAMPLE_MEAN_LWH: lwh_means, constants.SAMPLE_PROP_CEN_Z_OFFSET: prop_cen_z_offset_list, constants.SAMPLE_VIEWING_ANGLES_2D: label_viewing_angles_2d, } if self.train_val_test in ['train', 'val']: sample_dict.update({ constants.SAMPLE_LABEL_BOXES_3D: label_boxes_3d, constants.SAMPLE_ALPHAS: label_alphas, constants.SAMPLE_ALPHA_BINS: np.asarray(label_alpha_bins), constants.SAMPLE_ALPHA_REGS: np.asarray(label_alpha_regs), constants.SAMPLE_ALPHA_VALID_BINS: np.asarray(label_valid_alpha_bins), constants.SAMPLE_VIEWING_ANGLES_3D: label_viewing_angles_3d, constants.SAMPLE_INSTANCE_MASKS: instance_masks, constants.SAMPLE_DEPTH_MAP: depth_map, }) elif self.train_val_test == 'test': # No additional labels for test mode pass sample_dicts.append(sample_dict) return sample_dicts
def get_clusters(self): """ Calculates clusters for each class Returns: all_clusters: list of clusters for each class all_std_devs: list of cluster standard deviations for each class """ classes = self._dataset.classes num_clusters = self._dataset.num_clusters all_clusters = [[] for _ in range(len(classes))] all_std_devs = [[] for _ in range(len(classes))] classes_not_loaded = [] # Try to read from file first for class_idx in range(len(classes)): clusters, std_devs = self._read_clusters_from_file( self._dataset, classes[class_idx], num_clusters[class_idx]) if clusters is not None: all_clusters[class_idx].extend(np.asarray(clusters)) all_std_devs[class_idx].extend(np.asarray(std_devs)) else: classes_not_loaded.append(class_idx) # Return the data flattened into N x 3 arrays if len(classes_not_loaded) == 0: return all_clusters, all_std_devs # Calculate the remaining clusters # Load labels corresponding to the sample list for clustering sample_list = self._dataset.load_sample_names(self.cluster_split) all_labels = [[] for _ in range(len(classes))] num_samples = len(sample_list) for sample_idx in range(num_samples): sys.stdout.write("\rClustering labels {} / {}".format( sample_idx + 1, num_samples)) sys.stdout.flush() sample_name = sample_list[sample_idx] obj_labels = obj_utils.read_labels(self._dataset.kitti_label_dir, sample_name) filtered_labels = LabelClusterUtils._filter_labels_by_class( obj_labels, self._dataset.classes) for class_idx in range(len(classes)): all_labels[class_idx].extend(filtered_labels[class_idx]) print("\nFinished reading labels, clustering data...\n") # Cluster for class_idx in classes_not_loaded: labels_for_class = np.array(all_labels[class_idx]) n_clusters_for_class = num_clusters[class_idx] if len(labels_for_class) < n_clusters_for_class: raise ValueError( "Number of samples is less than number of clusters " "{} < {}".format(len(labels_for_class), n_clusters_for_class)) k_means = KMeans(n_clusters=n_clusters_for_class, random_state=0).fit(labels_for_class) clusters_for_class = [] std_devs_for_class = [] for cluster_idx in range(len(k_means.cluster_centers_)): cluster_centre = k_means.cluster_centers_[cluster_idx] labels_in_cluster = labels_for_class[k_means.labels_ == cluster_idx] # Calculate std. dev std_dev = np.std(labels_in_cluster, axis=0) formatted_cluster = [ float('%.3f' % value) for value in cluster_centre ] formatted_std_dev = [ float('%.3f' % value) for value in std_dev ] clusters_for_class.append(formatted_cluster) std_devs_for_class.append(formatted_std_dev) # Write to files file_path = self._get_cluster_file_path(self._dataset, classes[class_idx], num_clusters[class_idx]) self._write_clusters_to_file(file_path, clusters_for_class, std_devs_for_class) # Add to full list all_clusters[class_idx].extend(np.asarray(clusters_for_class)) all_std_devs[class_idx].extend(np.asarray(std_devs_for_class)) # Return the data flattened into N x 3 arrays return all_clusters, all_std_devs