def detect( self, images: Union[str, numpy.ndarray, List[str], List[numpy.ndarray]] ) -> List[numpy.ndarray]: """ :param images: list of or singular file paths or numpy arrays of images to run the detection model on. Number of images should equal model batch size :return: list of post-processed object detection results for each image including class label, likelihood, and bounding box coordinates """ if not isinstance(images, List): images = [images] images = numpy.stack([load_image(image) for image in images]) print( f"Sending batch of {len(images)} images for detection to {self._url}" ) start = time.time() # Encode inputs data = arrays_to_bytes([images]) # Send data to server for inference response = requests.post(self._url, data=data) # Decode outputs outputs = bytes_to_arrays(response.content) total_time = time.time() - start print(f"Round-trip time took {total_time * 1000.0:.4f}ms") return outputs
def predict(): # load raw images raw_data = flask.request.get_data() inputs = bytes_to_arrays(raw_data) print(f"Received {len(inputs)} images from client") # pre-processing preprocess_start_time = time.time() if not args.quantized_inputs: inputs = [inputs[0].astype(numpy.float32) / 255.0] preprocess_time = time.time() - preprocess_start_time print(f"Pre-processing time: {preprocess_time * 1000.0:.4f}ms") # inference print("Executing model") outputs, elapsed_time = engine.timed_run(inputs) print(f"Inference time: {elapsed_time * 1000.0:.4f}ms") # post-processing postprocess_start_time = time.time() outputs = postprocessor.pre_nms_postprocess(outputs) postprocess_time = time.time() - postprocess_start_time print(f"Post-processing, pre-nms time: {postprocess_time * 1000.0:.4f}ms") # NMS nms_start_time = time.time() outputs = postprocess_nms(outputs) nms_time = time.time() - nms_start_time print(f"nms time: {nms_time * 1000.0:.4f}ms") return arrays_to_bytes(outputs)
def run(): data = flask.request.get_data() inputs = bytes_to_arrays(data) _LOGGER.info(f"Received {len(inputs)} inputs from client") _LOGGER.info("Executing model") outputs, elapsed_time = engine.timed_run(inputs) _LOGGER.info( f"Inference time took {elapsed_time * 1000.0:.4f} milliseconds") _LOGGER.info(f"Produced {len(outputs)} output tensors") return arrays_to_bytes(outputs)
def predict(): """ Expects data as bytes :returns: returns classification result as bytes """ start = time.time() raw_data = flask.request.get_data() inputs = bytes_to_arrays(raw_data) print(f"Received {len(inputs)} images from client") result = predictor.timed_run(inputs) inference_time = (time.time() - start) * 1000 print(f"inference time: {inference_time:.4f} ms") return arrays_to_bytes(result[0])
def classify( self, images: Union[str, numpy.ndarray, List[str], List[numpy.ndarray]], ) -> List[numpy.ndarray]: """ :param images: list of numpy arrays of images to run the detection model on. Number of images should equal model batch size :return: list of post-processed object detection results for each image including class label, likelihood, and bounding box coordinates """ if not isinstance(images, List): images = [images] print(f"Sending batch of {len(images)} images for detection to {self._url}") start = time.time() # Encode inputs data = arrays_to_bytes(images) preprocessing_end = time.time() preprocessing_time = preprocessing_end - start # Send data to server for inference response = requests.post(self._url, data=data) execution_end_time = time.time() engine_execution_time = execution_end_time - preprocessing_end # Decode outputs outputs = bytes_to_arrays(response.content) postprocessing_time = time.time() - execution_end_time total_time = time.time() - start print( f"Preprocessing time {preprocessing_time * 1000:.4f}ms", f"Engine run time {engine_execution_time * 1000:.4f}ms", f"Postprocessing time {postprocessing_time * 1000:.4f}ms", f"Round-trip time {total_time * 1000.0:.4f}ms", sep="\n", ) return outputs