def produce_tensorflow_detections(detection_files, tf_inference_wrapper, batch_size, image_numbers, image_path): """Fetches output from Tensorflow model, and saves it to results file. The format of output from Tensorflow is output_dict Python dictionary containing following fields: num_detections: maximum number of detections keeped per image detection_classes: label of classes detected detection_scores: confidences for detections detection_boxes: bounding box coordinates for detections, in format (ymin, xmin, ymax, xmax) This function iterates over all VOC images, feeding each one into Tensorflow model, fetching object detections from each output, converting them to Detection object, and saving to detection result file. Args: detection_files (dict): dictionary that maps class labels to class result files tf_inference_wrapper (inference_utils.TensorflowInference): internal Python class wrapping Tensorflow inferece setup/run code batch_size (int): batch size used for inference image_numbers [str]: VOC image numbers to use for inference image_path (str): Python string, which stores path to VOC image file, when you do image_path.format(voc_mage_number) """ total_imgs = len(image_numbers) for idx in range(0, len(image_numbers), batch_size): print("Infering image {}/{}".format(idx + 1, total_imgs)) imgs = image_numbers[idx:idx + batch_size] image_paths = [image_path.format(img) for img in imgs] output_dict = tf_inference_wrapper.infer_batch(image_paths) keep_count = output_dict['num_detections'] for img_idx, img_number in enumerate(imgs): for det in range(int(keep_count[img_idx])): label = output_dict['detection_classes'][img_idx][det] confidence = output_dict['detection_scores'][img_idx][det] bbox = output_dict['detection_boxes'][img_idx][det] # Output bounding boxes are in [0, 1] format, # here we rescale them to pixel [0, 255] format ymin, xmin, ymax, xmax = bbox xmin = float(xmin) * model_utils.ModelData.get_input_width() ymin = float(ymin) * model_utils.ModelData.get_input_height() xmax = float(xmax) * model_utils.ModelData.get_input_width() ymax = float(ymax) * model_utils.ModelData.get_input_height() # Detection is saved only if confidence is bigger than zero if confidence > 0.0: # Model was trained on COCO, so we need to convert label to VOC one label_name = voc_utils.coco_label_to_voc_label( COCO_LABELS[label]) if label_name: # Checks for label_name correctness det_file = detection_files[label_name] detection = Detection( img_number, confidence, xmin, ymin, xmax, ymax, ) detection.write_to_file(det_file)
def produce_tensorrt_detections(detection_files, trt_inference_wrapper, max_batch_size, image_numbers, image_path): """Fetches output from TensorRT model, and saves it to results file. The output of TensorRT model is a pair of: * location byte array that contains detection metadata, which is layout according to TRT_PREDICTION_LAYOUT * number of detections returned by NMS TRT_PREDICTION_LAYOUT fields correspond to Tensorflow ones as follows: label -> detection_classes confidence -> detection_scores xmin, ymin, xmax, ymax -> detection_boxes The number of detections correspond to num_detection Tensorflow output. Tensorflow output semantics is more throughly explained in produce_tensorflow_detections(). This function iterates over all VOC images, feeding each one into TensotRT model, fetching object detections from each output, converting them to Detection object, and saving to detection result file. Args: detection_files (dict): dictionary that maps class labels to class result files trt_inference_wrapper (inference_utils.TRTInference): internal Python class wrapping TensorRT inferece setup/run code batch_size (int): batch size used for inference image_numbers [str]: VOC image numbers to use for inference image_path (str): Python string, which stores path to VOC image file, when you do image_path.format(voc_mage_number) """ total_imgs = len(image_numbers) for idx in range(0, len(image_numbers), max_batch_size): imgs = image_numbers[idx:idx + max_batch_size] batch_size = len(imgs) print("Infering image {}/{}".format(idx + 1, total_imgs)) image_paths = [image_path.format(img) for img in imgs] detections, keep_count = trt_inference_wrapper.infer_batch(image_paths) prediction_fields = len(TRT_PREDICTION_LAYOUT) for img_idx, img_number in enumerate(imgs): img_predictions_start_idx = prediction_fields * keep_count[ img_idx] * img_idx for det in range(int(keep_count[img_idx])): _, label, confidence, xmin, ymin, xmax, ymax = \ analyze_tensorrt_prediction(detections, img_predictions_start_idx + det * prediction_fields) if confidence > 0.0: label_name = voc_utils.coco_label_to_voc_label( COCO_LABELS[label]) if label_name: det_file = detection_files[label_name] detection = Detection( img_number, confidence, xmin, ymin, xmax, ymax, ) detection.write_to_file(det_file)