def produce_tensorflow_detections(detection_files, tf_inference_wrapper,
                                  batch_size, image_numbers, image_path):
    """Fetches output from Tensorflow model, and saves it to results file.

    The format of output from Tensorflow is output_dict Python
    dictionary containing following fields:
        num_detections: maximum number of detections keeped per image
        detection_classes: label of classes detected
        detection_scores: confidences for detections
        detection_boxes: bounding box coordinates for detections,
            in format (ymin, xmin, ymax, xmax)

    This function iterates over all VOC images, feeding each one
    into Tensorflow model, fetching object detections
    from each output, converting them to Detection object,
    and saving to detection result file.

    Args:
        detection_files (dict): dictionary that maps class labels to
            class result files
        tf_inference_wrapper (inference_utils.TensorflowInference):
            internal Python class wrapping Tensorflow inferece
            setup/run code
        batch_size (int): batch size used for inference
        image_numbers [str]: VOC image numbers to use for inference
        image_path (str): Python string, which stores path to VOC image file,
            when you do image_path.format(voc_mage_number)
    """
    total_imgs = len(image_numbers)
    for idx in range(0, len(image_numbers), batch_size):
        print("Infering image {}/{}".format(idx + 1, total_imgs))

        imgs = image_numbers[idx:idx + batch_size]
        image_paths = [image_path.format(img) for img in imgs]
        output_dict = tf_inference_wrapper.infer_batch(image_paths)

        keep_count = output_dict['num_detections']
        for img_idx, img_number in enumerate(imgs):
            for det in range(int(keep_count[img_idx])):
                label = output_dict['detection_classes'][img_idx][det]
                confidence = output_dict['detection_scores'][img_idx][det]
                bbox = output_dict['detection_boxes'][img_idx][det]

                # Output bounding boxes are in [0, 1] format,
                # here we rescale them to pixel [0, 255] format
                ymin, xmin, ymax, xmax = bbox
                xmin = float(xmin) * model_utils.ModelData.get_input_width()
                ymin = float(ymin) * model_utils.ModelData.get_input_height()
                xmax = float(xmax) * model_utils.ModelData.get_input_width()
                ymax = float(ymax) * model_utils.ModelData.get_input_height()

                # Detection is saved only if confidence is bigger than zero
                if confidence > 0.0:
                    # Model was trained on COCO, so we need to convert label to VOC one
                    label_name = voc_utils.coco_label_to_voc_label(
                        COCO_LABELS[label])
                    if label_name:  # Checks for label_name correctness
                        det_file = detection_files[label_name]
                        detection = Detection(
                            img_number,
                            confidence,
                            xmin,
                            ymin,
                            xmax,
                            ymax,
                        )
                        detection.write_to_file(det_file)
def produce_tensorrt_detections(detection_files, trt_inference_wrapper,
                                max_batch_size, image_numbers, image_path):
    """Fetches output from TensorRT model, and saves it to results file.

    The output of TensorRT model is a pair of:
      * location byte array that contains detection metadata,
        which is layout according to TRT_PREDICTION_LAYOUT
      * number of detections returned by NMS

    TRT_PREDICTION_LAYOUT fields correspond to Tensorflow ones as follows:
      label -> detection_classes
      confidence -> detection_scores
      xmin, ymin, xmax, ymax -> detection_boxes

    The number of detections correspond to num_detection Tensorflow output.

    Tensorflow output semantics is more throughly explained in
    produce_tensorflow_detections().

    This function iterates over all VOC images, feeding each one
    into TensotRT model, fetching object detections
    from each output, converting them to Detection object,
    and saving to detection result file.

    Args:
        detection_files (dict): dictionary that maps class labels to
            class result files
        trt_inference_wrapper (inference_utils.TRTInference):
            internal Python class wrapping TensorRT inferece
            setup/run code
        batch_size (int): batch size used for inference
        image_numbers [str]: VOC image numbers to use for inference
        image_path (str): Python string, which stores path to VOC image file,
            when you do image_path.format(voc_mage_number)
    """
    total_imgs = len(image_numbers)
    for idx in range(0, len(image_numbers), max_batch_size):
        imgs = image_numbers[idx:idx + max_batch_size]
        batch_size = len(imgs)
        print("Infering image {}/{}".format(idx + 1, total_imgs))
        image_paths = [image_path.format(img) for img in imgs]
        detections, keep_count = trt_inference_wrapper.infer_batch(image_paths)
        prediction_fields = len(TRT_PREDICTION_LAYOUT)
        for img_idx, img_number in enumerate(imgs):
            img_predictions_start_idx = prediction_fields * keep_count[
                img_idx] * img_idx
            for det in range(int(keep_count[img_idx])):
                _, label, confidence, xmin, ymin, xmax, ymax = \
                    analyze_tensorrt_prediction(detections, img_predictions_start_idx + det * prediction_fields)
                if confidence > 0.0:
                    label_name = voc_utils.coco_label_to_voc_label(
                        COCO_LABELS[label])
                    if label_name:
                        det_file = detection_files[label_name]
                        detection = Detection(
                            img_number,
                            confidence,
                            xmin,
                            ymin,
                            xmax,
                            ymax,
                        )
                        detection.write_to_file(det_file)