def convert_detections(table_path):
  """Convert detections in `table_path` to metric format.

  Args:
    table_path: Path to TFRecord file of decoder outputs.

  Returns:
    (preds, gts): metric_pb2.Objects() of predictions and groundtruths.
  """
  img_ids = []
  img_id_dict = {}
  for serialized in tf.io.tf_record_iterator(table_path):
    record = record_pb2.Record()
    record.ParseFromString(serialized)
    img_id = str(tf.make_ndarray(record.fields["frame_id"]))
    img_ids.append(img_id)
    np_dict = {k: tf.make_ndarray(v) for k, v in record.fields.items()}
    img_id_dict[img_id] = np_dict

  preds = metrics_pb2.Objects()
  gts = metrics_pb2.Objects()
  for img_id in img_ids:
    # Extract the underlying context string and timestamp
    # from the image id.
    #
    # TODO(vrv): Consider embedding these values into the decoder output
    # individually.
    context_name = img_id[2:img_id.rindex("_")]
    timestamp = int(img_id[img_id.rindex("_") + 1:-1])

    np_dict = img_id_dict[img_id]
    pred_bboxes = np_dict["bboxes"]  # [max boxes, 7]
    pred_scores = np_dict["scores"]  # [max_boxes]
    gt_bboxes = np_dict["gt_bboxes"]  # [num_gt_boxes, 7]
    gt_labels = np_dict["gt_labels"]  # [num_gt_boxes]
    class_ids = np_dict["class_ids"]  # [max_boxes]

    def _add_box(label, box_vec):
      label.box.center_x = box_vec[0]
      label.box.center_y = box_vec[1]
      label.box.center_z = box_vec[2]
      label.box.length = box_vec[3]
      label.box.width = box_vec[4]
      label.box.height = box_vec[5]
      label.box.heading = box_vec[6]

    num_gts = gt_bboxes.shape[0]
    for gt_idx in range(num_gts):
      gt_object = metrics_pb2.Object()
      gt_object.context_name = context_name
      gt_object.frame_timestamp_micros = timestamp
      label = gt_object.object
      _add_box(label, gt_bboxes[gt_idx])
      label.type = gt_labels[gt_idx]
      # We should fill in the difficulty level once we want to measure the
      # breakdown by LEVEL.
      label.detection_difficulty_level = 0
      gts.objects.append(gt_object)

    num_pds = pred_bboxes.shape[0]
    for pd_idx in range(num_pds):
      score = pred_scores[pd_idx]
      if score < FLAGS.score_threshold:
        continue
      pd_object = metrics_pb2.Object()
      pd_object.context_name = context_name
      pd_object.frame_timestamp_micros = timestamp
      pd_object.score = score
      label = pd_object.object
      _add_box(label, pred_bboxes[pd_idx])
      label.type = class_ids[pd_idx]
      preds.objects.append(pd_object)

  return preds, gts
Example #2
0
def main(argv):
    if len(argv) > 1:
        raise tf.app.UsageError("Too many command-line arguments.")

    if FLAGS.decoder_path:
        assert not FLAGS.car_decoder_path and not FLAGS.ped_decoder_path \
            and not FLAGS.cyc_decoder_path, ("Either provide decoder_path or "
                                             "individual decoders but not both.")
    else:
        assert FLAGS.car_decoder_path and FLAGS.ped_decoder_path and \
            FLAGS.cyc_decoder_path, ("No decoder_path specified. Please supply all "
                                     "individual decoder_paths for labels.")
    is_single_decoder_file = FLAGS.decoder_path is not None

    if is_single_decoder_file:
        list_of_decoder_paths = [FLAGS.decoder_path]
    else:
        # Note the correspondence between _INCLUDED_KITTI_CLASS_NAMES ordering and
        # this list.
        list_of_decoder_paths = [
            FLAGS.car_decoder_path, FLAGS.ped_decoder_path,
            FLAGS.cyc_decoder_path
        ]

    # A list of dictionaries mapping img ids to a dictionary of numpy tensors.
    table_data = []

    img_ids = []
    for table_path in list_of_decoder_paths:
        img_id_dict = {}
        for serialized in tf.io.tf_record_iterator(table_path):
            record = record_pb2.Record()
            record.ParseFromString(serialized)
            img_id = str(tf.make_ndarray(record.fields["img_id"]))
            img_ids.append(img_id)
            np_dict = {k: tf.make_ndarray(v) for k, v in record.fields.items()}
            img_id_dict[img_id] = np_dict
        table_data.append(img_id_dict)
    img_ids = list(set(img_ids))

    if not tf.io.gfile.exists(FLAGS.output_dir):
        tf.io.gfile.mkdir(FLAGS.output_dir)

    all_kitti_class_names = kitti_metadata.KITTIMetadata().ClassNames()
    calib_data = LoadCalibData(tf.io.gfile.GFile(FLAGS.calib_file, "rb"))
    count = 0
    for img_id in img_ids:
        # Ignore padded samples where the img_ids are empty.
        if not img_id:
            continue
        for table_index, img_id_dict in enumerate(table_data):
            if img_id in img_id_dict:
                np_dict = img_id_dict[img_id]

                (location_cam, dimension_cam, rotation_cam, bboxes_2d, scores,
                 class_ids) = ExtractNpContent(np_dict,
                                               calib_data[img_id + ".txt"])
                if is_single_decoder_file:
                    valid_labels = _INCLUDED_KITTI_CLASS_NAMES
                else:
                    valid_labels = [_INCLUDED_KITTI_CLASS_NAMES[table_index]]
                is_first = table_index == 0
                for class_name in valid_labels:
                    class_mask = (
                        class_ids == all_kitti_class_names.index(class_name))
                    ExportKITTIDetection(
                        FLAGS.output_dir, img_id, location_cam[class_mask],
                        dimension_cam[class_mask], rotation_cam[class_mask],
                        bboxes_2d[class_mask], scores[class_mask], class_name,
                        is_first)
        count += 1
    tf.logging.info("Total example exported: %d", count)