def predict(): # load raw images raw_data = flask.request.get_data() inputs = bytes_to_arrays(raw_data) print(f"Received {len(inputs)} images from client") # pre-processing preprocess_start_time = time.time() if not args.quantized_inputs: inputs = [inputs[0].astype(numpy.float32) / 255.0] preprocess_time = time.time() - preprocess_start_time print(f"Pre-processing time: {preprocess_time * 1000.0:.4f}ms") # inference print("Executing model") outputs, elapsed_time = engine.timed_run(inputs) print(f"Inference time: {elapsed_time * 1000.0:.4f}ms") # post-processing postprocess_start_time = time.time() outputs = postprocessor.pre_nms_postprocess(outputs) postprocess_time = time.time() - postprocess_start_time print(f"Post-processing, pre-nms time: {postprocess_time * 1000.0:.4f}ms") # NMS nms_start_time = time.time() outputs = postprocess_nms(outputs) nms_time = time.time() - nms_start_time print(f"nms time: {nms_time * 1000.0:.4f}ms") return arrays_to_bytes(outputs)
def annotate(args): save_dir = _get_save_dir(args) model = _load_model(args) loader, saver, is_video = get_yolo_loader_and_saver( args.source, save_dir, args.image_shape, args) is_webcam = args.source.isnumeric() postprocessor = (YoloPostprocessor(args.image_shape) if args.engine in [DEEPSPARSE_ENGINE, ORT_ENGINE] else None) for iteration, (inp, source_img) in enumerate(loader): if args.device not in ["cpu", None]: torch.cuda.synchronize() iter_start = time.time() # pre-processing batch = _preprocess_batch(args, inp) # inference outputs = _run_model(args, model, batch) # post-processing if postprocessor: outputs = postprocessor.pre_nms_postprocess(outputs) # NMS outputs = postprocess_nms(outputs)[0] if args.device not in ["cpu", None]: torch.cuda.synchronize() # annotate measured_fps = (args.target_fps or (1.0 / (time.time() - iter_start)) if is_video else None) annotated_img = annotate_image( source_img, outputs, model_input_size=args.image_shape, images_per_sec=measured_fps, ) # display if is_webcam: cv2.imshow("annotations", annotated_img) cv2.waitKey(1) # save if saver: saver.save_frame(annotated_img) iter_end = time.time() elapsed_time = 1000 * (iter_end - iter_start) _LOGGER.info(f"Inference {iteration} processed in {elapsed_time} ms") if saver: saver.close() _LOGGER.info(f"Results saved to {save_dir}")
def benchmark_yolo(args): model = _load_model(args) print("Loading dataset") dataset, _ = load_images(args.data_path, tuple(args.image_shape)) total_iterations = args.num_iterations + args.num_warmup_iterations data_loader = _iter_batches(dataset, args.batch_size, total_iterations) print( (f"Running for {args.num_warmup_iterations} warmup iterations " f"and {args.num_iterations} benchmarking iterations"), flush=True, ) postprocessor = (YoloPostprocessor(args.image_shape) if args.engine in [DEEPSPARSE_ENGINE, ORT_ENGINE] else None) results = BenchmarkResults() progress_bar = tqdm(total=args.num_iterations) for iteration, batch in enumerate(data_loader): if args.device not in ["cpu", None]: torch.cuda.synchronize() iter_start = time.time() # pre-processing batch = _preprocess_batch(args, batch) # inference outputs = _run_model(args, model, batch) # post-processing if postprocessor: outputs = postprocessor.pre_nms_postprocess(outputs) # NMS outputs = postprocess_nms(outputs) if args.device not in ["cpu", None]: torch.cuda.synchronize() iter_end = time.time() if iteration >= args.num_warmup_iterations: results.append_batch( time_start=iter_start, time_end=iter_end, batch_size=args.batch_size, ) progress_bar.update(1) progress_bar.close() print(f"Benchmarking complete. End-to-end results:\n{results}") print( f"End-to-end per image time: {results.ms_per_batch / args.batch_size}ms" )