def convert_detections(table_path): """Convert detections in `table_path` to metric format. Args: table_path: Path to TFRecord file of decoder outputs. Returns: (preds, gts): metric_pb2.Objects() of predictions and groundtruths. """ img_ids = [] img_id_dict = {} for serialized in tf.io.tf_record_iterator(table_path): record = record_pb2.Record() record.ParseFromString(serialized) img_id = str(tf.make_ndarray(record.fields["frame_id"])) img_ids.append(img_id) np_dict = {k: tf.make_ndarray(v) for k, v in record.fields.items()} img_id_dict[img_id] = np_dict preds = metrics_pb2.Objects() gts = metrics_pb2.Objects() for img_id in img_ids: # Extract the underlying context string and timestamp # from the image id. # # TODO(vrv): Consider embedding these values into the decoder output # individually. context_name = img_id[2:img_id.rindex("_")] timestamp = int(img_id[img_id.rindex("_") + 1:-1]) np_dict = img_id_dict[img_id] pred_bboxes = np_dict["bboxes"] # [max boxes, 7] pred_scores = np_dict["scores"] # [max_boxes] gt_bboxes = np_dict["gt_bboxes"] # [num_gt_boxes, 7] gt_labels = np_dict["gt_labels"] # [num_gt_boxes] class_ids = np_dict["class_ids"] # [max_boxes] def _add_box(label, box_vec): label.box.center_x = box_vec[0] label.box.center_y = box_vec[1] label.box.center_z = box_vec[2] label.box.length = box_vec[3] label.box.width = box_vec[4] label.box.height = box_vec[5] label.box.heading = box_vec[6] num_gts = gt_bboxes.shape[0] for gt_idx in range(num_gts): gt_object = metrics_pb2.Object() gt_object.context_name = context_name gt_object.frame_timestamp_micros = timestamp label = gt_object.object _add_box(label, gt_bboxes[gt_idx]) label.type = gt_labels[gt_idx] # We should fill in the difficulty level once we want to measure the # breakdown by LEVEL. label.detection_difficulty_level = 0 gts.objects.append(gt_object) num_pds = pred_bboxes.shape[0] for pd_idx in range(num_pds): score = pred_scores[pd_idx] if score < FLAGS.score_threshold: continue pd_object = metrics_pb2.Object() pd_object.context_name = context_name pd_object.frame_timestamp_micros = timestamp pd_object.score = score label = pd_object.object _add_box(label, pred_bboxes[pd_idx]) label.type = class_ids[pd_idx] preds.objects.append(pd_object) return preds, gts
def main(argv): if len(argv) > 1: raise tf.app.UsageError("Too many command-line arguments.") if FLAGS.decoder_path: assert not FLAGS.car_decoder_path and not FLAGS.ped_decoder_path \ and not FLAGS.cyc_decoder_path, ("Either provide decoder_path or " "individual decoders but not both.") else: assert FLAGS.car_decoder_path and FLAGS.ped_decoder_path and \ FLAGS.cyc_decoder_path, ("No decoder_path specified. Please supply all " "individual decoder_paths for labels.") is_single_decoder_file = FLAGS.decoder_path is not None if is_single_decoder_file: list_of_decoder_paths = [FLAGS.decoder_path] else: # Note the correspondence between _INCLUDED_KITTI_CLASS_NAMES ordering and # this list. list_of_decoder_paths = [ FLAGS.car_decoder_path, FLAGS.ped_decoder_path, FLAGS.cyc_decoder_path ] # A list of dictionaries mapping img ids to a dictionary of numpy tensors. table_data = [] img_ids = [] for table_path in list_of_decoder_paths: img_id_dict = {} for serialized in tf.io.tf_record_iterator(table_path): record = record_pb2.Record() record.ParseFromString(serialized) img_id = str(tf.make_ndarray(record.fields["img_id"])) img_ids.append(img_id) np_dict = {k: tf.make_ndarray(v) for k, v in record.fields.items()} img_id_dict[img_id] = np_dict table_data.append(img_id_dict) img_ids = list(set(img_ids)) if not tf.io.gfile.exists(FLAGS.output_dir): tf.io.gfile.mkdir(FLAGS.output_dir) all_kitti_class_names = kitti_metadata.KITTIMetadata().ClassNames() calib_data = LoadCalibData(tf.io.gfile.GFile(FLAGS.calib_file, "rb")) count = 0 for img_id in img_ids: # Ignore padded samples where the img_ids are empty. if not img_id: continue for table_index, img_id_dict in enumerate(table_data): if img_id in img_id_dict: np_dict = img_id_dict[img_id] (location_cam, dimension_cam, rotation_cam, bboxes_2d, scores, class_ids) = ExtractNpContent(np_dict, calib_data[img_id + ".txt"]) if is_single_decoder_file: valid_labels = _INCLUDED_KITTI_CLASS_NAMES else: valid_labels = [_INCLUDED_KITTI_CLASS_NAMES[table_index]] is_first = table_index == 0 for class_name in valid_labels: class_mask = ( class_ids == all_kitti_class_names.index(class_name)) ExportKITTIDetection( FLAGS.output_dir, img_id, location_cam[class_mask], dimension_cam[class_mask], rotation_cam[class_mask], bboxes_2d[class_mask], scores[class_mask], class_name, is_first) count += 1 tf.logging.info("Total example exported: %d", count)