def testLoadBoundingBoxesDistance(self): # Test if all of the groundtruth data loads correctly for each label # when distance is specified. metadata = kitti_metadata.KITTIMetadata() num_classes = len(metadata.ClassNames()) test_data = self._GenerateMetricsWithTestData(num_classes) num_bins_of_distance = int( np.rint(metadata.MaximumDistance() / metadata.DistanceBinWidth())) distance_metric = test_data.metrics._breakdown_metrics['distance'] # Test if all of the groundtruth data loads correctly for each label # when no distance is specified. self.assertAllEqual(test_data.expected_objects_at_distance, np.transpose(distance_metric._histogram)) # Note that we always skip 'Background' class 0. for label in range(1, num_classes): for distance in range(num_bins_of_distance): data = test_data.metrics._LoadBoundingBoxes('groundtruth', label, distance=distance) if test_data.expected_objects_at_distance[label, distance] == 0: self.assertIsNone(data) else: self.assertEqual( test_data.expected_objects_at_distance[label, distance], len(data.boxes)) self.assertEqual( test_data.expected_objects_at_distance[label, distance], len(data.imgids)) self.assertEqual( test_data.expected_objects_at_distance[label, distance], len(data.scores)) self.assertEqual( test_data.expected_objects_at_distance[label, distance], len(data.difficulties)) self.assertAllEqual( np.ones(shape=[ test_data.expected_objects_at_distance[label, distance] ]), data.scores) self.assertAllEqual( np.zeros(shape=[ test_data.expected_objects_at_distance[label, distance] ]), data.imgids)
def testLoadBoundingBoxesRotation(self): # Test if all of the groundtruth data loads correctly for each label # when rotation is specified. metadata = kitti_metadata.KITTIMetadata() num_classes = len(metadata.ClassNames()) test_data = self._GenerateMetricsWithTestData(num_classes) num_bins_of_rotation = metadata.NumberOfRotationBins() rotation_metric = test_data.metrics._breakdown_metrics['rotation'] # Test if all of the groundtruth data loads correctly for each label # when no distance is specified. self.assertAllEqual(test_data.expected_objects_at_rotation, np.transpose(rotation_metric._histogram)) # Note that we always skip 'Background' class 0. for label in range(1, num_classes): for rotation in range(num_bins_of_rotation): data = test_data.metrics._LoadBoundingBoxes('groundtruth', label, rotation=rotation) if test_data.expected_objects_at_rotation[label, rotation] == 0: self.assertIsNone(data) else: self.assertEqual( test_data.expected_objects_at_rotation[label, rotation], len(data.boxes)) self.assertEqual( test_data.expected_objects_at_rotation[label, rotation], len(data.imgids)) self.assertEqual( test_data.expected_objects_at_rotation[label, rotation], len(data.scores)) self.assertEqual( test_data.expected_objects_at_rotation[label, rotation], len(data.difficulties)) self.assertAllEqual( np.ones(shape=[ test_data.expected_objects_at_rotation[label, rotation] ]), data.scores) self.assertAllEqual( np.zeros(shape=[ test_data.expected_objects_at_rotation[label, rotation] ]), data.imgids)
def testLoadBoundingBoxesNumPoints(self): # Test if all of the groundtruth data loads correctly for each label # when number of points is specified. metadata = kitti_metadata.KITTIMetadata() num_classes = len(metadata.ClassNames()) test_data = self._GenerateMetricsWithTestData(num_classes) num_bins_of_points = metadata.NumberOfPointsBins() num_points_metric = test_data.metrics._breakdown_metrics['num_points'] self.assertAllEqual(test_data.expected_objects_at_points, np.transpose(num_points_metric._histogram)) # Note that we always skip 'Background' class 0. for label in range(1, num_classes): for num_points in range(num_bins_of_points): data = test_data.metrics._LoadBoundingBoxes( 'groundtruth', label, num_points=num_points) if test_data.expected_objects_at_points[label, num_points] == 0: self.assertIsNone(data) else: # Skip the first bin because it is a special case. if num_points == 0: continue self.assertEqual( test_data.expected_objects_at_points[label, num_points], len(data.boxes)) self.assertEqual( test_data.expected_objects_at_points[label, num_points], len(data.imgids)) self.assertEqual( test_data.expected_objects_at_points[label, num_points], len(data.scores)) self.assertEqual( test_data.expected_objects_at_points[label, num_points], len(data.difficulties)) self.assertAllEqual( np.ones(shape=[ test_data.expected_objects_at_points[label, num_points] ]), data.scores) self.assertAllEqual( np.zeros(shape=[ test_data.expected_objects_at_points[label, num_points] ]), data.imgids)
def testCalibrationCalculator(self): # End to end test for the calibration calculator. metadata = kitti_metadata.KITTIMetadata() calculator = calibration_processing.CalibrationCalculator(metadata) scores_and_hits = np.array([[0.3, 1], [0.5, 1], [0.7, 1]]) metrics = {} metrics['calibrations'] = [{'calibrations': scores_and_hits}] calculator.Calculate(metrics) summaries = calculator.Summary('Test') self.assertEqual(len(summaries), 2) ece_summary = summaries[1] self.assertEqual(0.5, ece_summary.value[0].simple_value)
def Params(cls): p = super().Params() p.Define( 'filter_predictions_outside_frustum', False, 'If true, predictions whose bounding box center is outside of the ' 'image frustum are dropped.') p.Define( 'truncation_threshold', 0.0, 'Specifies how much of a bounding box can be truncated ' 'by the edge of the image frustum and still be kept. A value of 0.0 ' 'means that we only drop predictions whose 2d bounding box ' 'falls entirely outside the image frustum. A value of 1.0 means ' 'we drop predictions where *any* portion of the bounding box falls ' 'outside the frustum.') p.ap_metric = kitti_ap_metric.KITTIAPMetrics.Params( kitti_metadata.KITTIMetadata()) return p
def testLoadBoundingBoxesDifficulty(self): metadata = kitti_metadata.KITTIMetadata() num_classes = len(metadata.ClassNames()) test_data = self._GenerateMetricsWithTestData(num_classes) expected_num_objects = np.sum(test_data.expected_objects_at_distance, axis=1) difficulty_metric = test_data.metrics._breakdown_metrics['difficulty'] # Test if difficulties are properly accumulated. for d in metadata.DifficultyLevels().values(): if d == 1: self.assertAllEqual(expected_num_objects, difficulty_metric._histogram[d, :]) else: self.assertAllEqual(np.zeros_like(expected_num_objects), difficulty_metric._histogram[d, :])
class KITTILabelExtractor(input_extractor.FieldsExtractor): """Extracts the object labels from a KITTI tf.Example. Emits: bboxes_count: Scalar number of 2D bounding boxes in the example. bboxes: [p.max_num_objects, 4] - 2D bounding box data in [ymin, xmin, ymax, xmax] format. bboxes_padding: [p.max_num_objects] - Padding for bboxes. bboxes_3d: [p.max_num_objects, 7] - 3D bounding box data in [x, y, z, dx, dy, dz, phi] format. x, y, z are the object center; dx, dy, dz are the dimensions of the box, and phi is the rotation angle around the z-axis. 3D bboxes are defined in the velodyne coordinate frame. bboxes_3d_mask: [p.max_num_objects] - Mask for bboxes (mask is the inversion of padding). bboxes3d_proj_to_image_plane: [p.max_num_objects, 8, 2] - For each bounding box, the 8 corners of the bounding box in projected image coordinates (x, y). bboxes_td: [p.max_num_objects, 4] - The 3D bounding box data in top down projected coordinates (ymin, xmin, ymax, xmax). This currently ignores rotation. bboxes_td_mask: [p.max_num_objects]: Mask for bboxes_td. bboxes_3d_num_points: [p.max_num_objects]: Number of points in each box. labels: [p.max_num_objects] - Integer label for each bounding box object corresponding to the index in KITTI_CLASS_NAMES. texts: [p.max_num_objects] - The class name for each label in labels. source_id: Scalar string. The unique identifier for each example. See ComputeKITTIDifficulties for more info of the following:: box_image_height: [p.max_num_objects] - The height of the box in pixels of each box in the projected image plane. occlusion: [p.max_num_objects] - The occlusion level of each bounding box. truncation: [p.max_num_objects] - The truncation level of each bounding box. difficulties: [p.max_num_objects] - The computed difficulty based on the above three factors. """ KITTI_CLASS_NAMES = kitti_metadata.KITTIMetadata().ClassNames() # Sub-classes for filtering labels when training class specific models. SUBCLASS_DICT = { 'human': [4, 5], 'cyclist': [6], 'motor': [1, 2, 3, 7], 'pedestrian': [4], } @classmethod def Params(cls): p = super(KITTILabelExtractor, cls).Params() p.Define('max_num_objects', 50, 'The number of objects per example.') p.Define('filter_labels', None, 'If not None, specifies a list of label ' 'indices to keep.') return p def FeatureMap(self): return { 'image/source_id': tf.io.FixedLenFeature((), tf.string, ''), 'object/image/bbox/xmin': tf.io.VarLenFeature(tf.float32), 'object/image/bbox/xmax': tf.io.VarLenFeature(tf.float32), 'object/image/bbox/ymin': tf.io.VarLenFeature(tf.float32), 'object/image/bbox/ymax': tf.io.VarLenFeature(tf.float32), 'object/label': tf.io.VarLenFeature(tf.string), 'object/has_3d_info': tf.io.VarLenFeature(dtype=tf.int64), 'object/occlusion': tf.io.VarLenFeature(dtype=tf.int64), 'object/truncation': tf.io.VarLenFeature(dtype=tf.float32), 'object/velo/bbox/xyz': tf.io.VarLenFeature(dtype=tf.float32), 'object/velo/bbox/dim_xyz': tf.io.VarLenFeature(dtype=tf.float32), 'object/velo/bbox/phi': tf.io.VarLenFeature(dtype=tf.float32), 'transform/velo_to_image_plane': tf.io.FixedLenFeature(shape=(3, 4), dtype=tf.float32), } def _Extract(self, features): p = self.params source_id = py_utils.HasShape(features['image/source_id'], []) xmin = _Dense(features['object/image/bbox/xmin']) xmax = _Dense(features['object/image/bbox/xmax']) ymin = _Dense(features['object/image/bbox/ymin']) ymax = _Dense(features['object/image/bbox/ymax']) # 2d bounding box in image coordinates. bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=1) bboxes_count = tf.shape(bboxes)[0] bboxes = py_utils.PadOrTrimTo(bboxes, [p.max_num_objects, 4]) bboxes_padding = 1.0 - py_utils.PadOrTrimTo(tf.ones([bboxes_count]), [p.max_num_objects]) dim_xyz = tf.reshape(_Dense(features['object/velo/bbox/dim_xyz']), [-1, 3]) loc_xyz = tf.reshape(_Dense(features['object/velo/bbox/xyz']), [-1, 3]) phi = tf.reshape(_Dense(features['object/velo/bbox/phi']), [-1, 1]) # bboxes_3d is in [x, y, z, dx, dy, dz, phi]. bboxes_3d = tf.concat([loc_xyz, dim_xyz, phi], axis=1) cx, cy, _, dx, dy, _, _ = tf.unstack(bboxes_3d, num=7, axis=-1) bboxes_td = tf.stack([ cy - dy / 2, cx - dx / 2, cy + dy / 2, cx + dx / 2, ], axis=-1) # pyformat: disable bboxes_td = py_utils.PadOrTrimTo(bboxes_td, [p.max_num_objects, 4]) has_3d_info = tf.cast(_Dense(features['object/has_3d_info']), tf.float32) bboxes_3d_mask = py_utils.PadOrTrimTo(has_3d_info, [p.max_num_objects]) bboxes_td_mask = bboxes_3d_mask # Fill in difficulties from bounding box height, truncation and occlusion. bb_height = ymax - ymin box_image_height = py_utils.PadOrTrimTo(bb_height, [p.max_num_objects]) box_image_height *= bboxes_3d_mask # 0 to 3 indicating occlusion level. 0 means fully visible, 1 means partly, occlusion = tf.reshape(_Dense(features['object/occlusion']), [-1]) occlusion = tf.cast(occlusion, tf.float32) occlusion = py_utils.PadOrTrimTo(occlusion, [p.max_num_objects]) occlusion *= bboxes_3d_mask # Truncation: 0 -> not truncated, 1.0 -> truncated truncation = tf.reshape(_Dense(features['object/truncation']), [-1]) truncation = py_utils.PadOrTrimTo(truncation, [p.max_num_objects]) truncation *= bboxes_3d_mask difficulties = ComputeKITTIDifficulties(box_image_height, occlusion, truncation) difficulties = py_utils.PadOrTrimTo(difficulties, [p.max_num_objects]) # Make a batch axis to call BBoxCorners, and take the first result back. bbox3d_corners = geometry.BBoxCorners(bboxes_3d[tf.newaxis, ...])[0] # Project the 3D bbox to the image plane. velo_to_image_plane = features['transform/velo_to_image_plane'] bboxes3d_proj_to_image_plane = geometry.PointsToImagePlane( tf.reshape(bbox3d_corners, [-1, 3]), velo_to_image_plane) # Output is [num_objects, 8 corners per object, (x, y)]. bboxes3d_proj_to_image_plane = tf.reshape(bboxes3d_proj_to_image_plane, [-1, 8, 2]) bboxes3d_proj_to_image_plane = py_utils.PadOrTrimTo( bboxes3d_proj_to_image_plane, [p.max_num_objects, 8, 2]) texts = features['object/label'].values labels = ops.static_map_string_int(x=texts, keys=self.KITTI_CLASS_NAMES) labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects]) texts = py_utils.PadOrTrimTo(texts, [p.max_num_objects]) # Filter labels by setting bboxes_padding, bboxes_3d_mask, and # bboxes_td_mask appropriately. if p.filter_labels is not None: valid_labels = tf.constant([p.filter_labels]) bbox_mask = tf.reduce_any(tf.equal(tf.expand_dims(labels, 1), valid_labels), axis=1) bbox_mask = tf.cast(bbox_mask, tf.float32) bboxes_padding = 1 - bbox_mask * (1 - bboxes_padding) filtered_bboxes_3d_mask = bboxes_3d_mask * bbox_mask bboxes_td_mask *= bbox_mask else: filtered_bboxes_3d_mask = bboxes_3d_mask # Placeholder for counting the number of laser points that reside within # each 3-d bounding box. This must be filled in outside of this function # based on the loaded 3-d laser points. bboxes_3d_num_points = tf.zeros([p.max_num_objects], dtype=tf.int32) bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points, [p.max_num_objects]) # Pad bboxes_3d. bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7]) return py_utils.NestedMap( source_id=source_id, bboxes_count=bboxes_count, bboxes=bboxes, bboxes_padding=bboxes_padding, bboxes_3d=bboxes_3d, bboxes_3d_mask=filtered_bboxes_3d_mask, unfiltered_bboxes_3d_mask=bboxes_3d_mask, bboxes3d_proj_to_image_plane=bboxes3d_proj_to_image_plane, bboxes_td=bboxes_td, bboxes_td_mask=bboxes_td_mask, bboxes_3d_num_points=bboxes_3d_num_points, labels=labels, texts=texts, box_image_height=box_image_height, occlusion=occlusion, truncation=truncation, difficulties=difficulties) def Shape(self): p = self.params return py_utils.NestedMap( source_id=tf.TensorShape([]), bboxes_count=tf.TensorShape([]), bboxes=tf.TensorShape([p.max_num_objects, 4]), bboxes_padding=tf.TensorShape([p.max_num_objects]), bboxes_3d=tf.TensorShape([p.max_num_objects, 7]), bboxes_3d_mask=tf.TensorShape([p.max_num_objects]), unfiltered_bboxes_3d_mask=tf.TensorShape([p.max_num_objects]), bboxes3d_proj_to_image_plane=tf.TensorShape( [p.max_num_objects, 8, 2]), bboxes_td=tf.TensorShape([p.max_num_objects, 4]), bboxes_td_mask=tf.TensorShape([p.max_num_objects]), bboxes_3d_num_points=tf.TensorShape([p.max_num_objects]), labels=tf.TensorShape([p.max_num_objects]), texts=tf.TensorShape([p.max_num_objects]), box_image_height=tf.TensorShape([p.max_num_objects]), occlusion=tf.TensorShape([p.max_num_objects]), truncation=tf.TensorShape([p.max_num_objects]), difficulties=tf.TensorShape([p.max_num_objects])) def DType(self): return py_utils.NestedMap(source_id=tf.string, bboxes_count=tf.int32, bboxes=tf.float32, bboxes_padding=tf.float32, bboxes_3d=tf.float32, bboxes_3d_mask=tf.float32, unfiltered_bboxes_3d_mask=tf.float32, bboxes3d_proj_to_image_plane=tf.float32, bboxes_td=tf.float32, bboxes_td_mask=tf.float32, bboxes_3d_num_points=tf.int32, labels=tf.int32, texts=tf.string, box_image_height=tf.float32, occlusion=tf.float32, truncation=tf.float32, difficulties=tf.int32)
def main(argv): if len(argv) > 1: raise tf.app.UsageError("Too many command-line arguments.") if FLAGS.decoder_path: assert not FLAGS.car_decoder_path and not FLAGS.ped_decoder_path \ and not FLAGS.cyc_decoder_path, ("Either provide decoder_path or " "individual decoders but not both.") else: assert FLAGS.car_decoder_path and FLAGS.ped_decoder_path and \ FLAGS.cyc_decoder_path, ("No decoder_path specified. Please supply all " "individual decoder_paths for labels.") is_single_decoder_file = FLAGS.decoder_path is not None if is_single_decoder_file: list_of_decoder_paths = [FLAGS.decoder_path] else: # Note the correspondence between _INCLUDED_KITTI_CLASS_NAMES ordering and # this list. list_of_decoder_paths = [ FLAGS.car_decoder_path, FLAGS.ped_decoder_path, FLAGS.cyc_decoder_path ] # A list of dictionaries mapping img ids to a dictionary of numpy tensors. table_data = [] img_ids = [] for table_path in list_of_decoder_paths: img_id_dict = {} for serialized in tf.io.tf_record_iterator(table_path): record = record_pb2.Record() record.ParseFromString(serialized) img_id = str(tf.make_ndarray(record.fields["img_id"])) img_ids.append(img_id) np_dict = {k: tf.make_ndarray(v) for k, v in record.fields.items()} img_id_dict[img_id] = np_dict table_data.append(img_id_dict) img_ids = list(set(img_ids)) if not tf.io.gfile.exists(FLAGS.output_dir): tf.io.gfile.mkdir(FLAGS.output_dir) all_kitti_class_names = kitti_metadata.KITTIMetadata().ClassNames() calib_data = LoadCalibData(tf.io.gfile.GFile(FLAGS.calib_file, "rb")) count = 0 for img_id in img_ids: # Ignore padded samples where the img_ids are empty. if not img_id: continue for table_index, img_id_dict in enumerate(table_data): if img_id in img_id_dict: np_dict = img_id_dict[img_id] (location_cam, dimension_cam, rotation_cam, bboxes_2d, scores, class_ids) = ExtractNpContent(np_dict, calib_data[img_id + ".txt"]) if is_single_decoder_file: valid_labels = _INCLUDED_KITTI_CLASS_NAMES else: valid_labels = [_INCLUDED_KITTI_CLASS_NAMES[table_index]] is_first = table_index == 0 for class_name in valid_labels: class_mask = ( class_ids == all_kitti_class_names.index(class_name)) ExportKITTIDetection( FLAGS.output_dir, img_id, location_cam[class_mask], dimension_cam[class_mask], rotation_cam[class_mask], bboxes_2d[class_mask], scores[class_mask], class_name, is_first) count += 1 tf.logging.info("Total example exported: %d", count)
def _GenerateMetricsWithTestData(self, num_classes): metadata = kitti_metadata.KITTIMetadata() num_bins_of_distance = int( np.rint(metadata.MaximumDistance() / metadata.DistanceBinWidth())) num_bins_of_rotation = metadata.NumberOfRotationBins() num_bins_of_points = metadata.NumberOfPointsBins() # Generate ground truth bounding boxes with prescribed labels, distances, # rotations and number of points. expected_objects_at_distance = np.random.randint( low=0, high=8, size=(num_classes, num_bins_of_distance), dtype=np.int32) expected_objects_at_rotation = np.zeros( shape=(num_classes, num_bins_of_rotation), dtype=np.int32) # Note that we need preserve the same number of objects for each label. expected_objects_at_points = np.zeros( shape=(num_classes, num_bins_of_points), dtype=np.int32) prob = 1.0 / float(num_bins_of_points) for c in range(num_classes): num_objects_for_class = np.sum(expected_objects_at_distance[c, :]) expected_objects_at_points[c, :] = np.random.multinomial( num_objects_for_class, pvals=num_bins_of_points * [prob]) # Zero out the number of boxes in the background class. expected_objects_at_distance[0, :] = 0 expected_objects_at_points[0, :] = 0 expected_objects_at_rotation[0, :] = 0 bboxes = [] labels = [] num_points = [] bin_width = ( metadata.MaximumRotation() / float(metadata.NumberOfRotationBins())) # Note that we always skip 'Background' class 0. for label in range(1, num_classes): for distance_index in range(num_bins_of_distance): distance = ( distance_index * metadata.DistanceBinWidth() + metadata.DistanceBinWidth() / 2.0) num_box = expected_objects_at_distance[label, distance_index] if num_box > 0: rotation_index = np.random.randint(num_bins_of_rotation) expected_objects_at_rotation[label, rotation_index] += num_box rotation = rotation_index * bin_width + bin_width / 2.0 bboxes.append( self._GenerateBBoxesAtDistanceAndRotation(num_box, distance, rotation)) labels.append(label * np.ones(shape=[num_box], dtype=np.int32)) point_bin_edges = np.logspace( np.log10(1.0), np.log10(metadata.MaximumNumberOfPoints()), metadata.NumberOfPointsBins() + 1) for point_index in range(num_bins_of_points): num_box = expected_objects_at_points[label, point_index] for _ in range(num_box): points = (point_bin_edges[point_index] + point_bin_edges[point_index + 1]) / 2.0 num_points.append([points]) bboxes = np.concatenate(bboxes) labels = np.concatenate(labels) num_points = np.concatenate(num_points) # Generate dummy predictions as placeholders for the API. num_predictions = 9 prediction_scores = np.random.uniform(size=[num_classes, num_predictions]) prediction_bboxes = self._GenerateRandomBBoxes( num_predictions * num_classes).reshape( (num_classes, num_predictions, 7)) # Update the metrics. metric_names = ['rotation', 'num_points', 'distance'] ap_params = kitti_ap_metric.KITTIAPMetrics.Params(metadata).Set( breakdown_metrics=metric_names) metrics = ap_params.Instantiate() metrics.Update( 'dummy_image1', py_utils.NestedMap( groundtruth_labels=labels, groundtruth_bboxes=bboxes, groundtruth_difficulties=np.ones(shape=(bboxes.shape[0])), groundtruth_num_points=num_points, detection_scores=prediction_scores, detection_boxes=prediction_bboxes, detection_heights_in_pixels=np.ones( shape=prediction_bboxes.shape[0:2]) * 100)) return py_utils.NestedMap( metrics=metrics, expected_objects_at_distance=expected_objects_at_distance, expected_objects_at_points=expected_objects_at_points, expected_objects_at_rotation=expected_objects_at_rotation)