コード例 #1
0
    def detect(
        self, images: Union[str, numpy.ndarray, List[str], List[numpy.ndarray]]
    ) -> List[numpy.ndarray]:
        """
        :param images: list of or singular file paths or numpy arrays of images to
            run the detection model on. Number of images should equal model batch size
        :return: list of post-processed object detection results for each image
            including class label, likelihood, and bounding box coordinates
        """
        if not isinstance(images, List):
            images = [images]
        images = numpy.stack([load_image(image) for image in images])

        print(
            f"Sending batch of {len(images)} images for detection to {self._url}"
        )

        start = time.time()
        # Encode inputs
        data = arrays_to_bytes([images])
        # Send data to server for inference
        response = requests.post(self._url, data=data)
        # Decode outputs
        outputs = bytes_to_arrays(response.content)
        total_time = time.time() - start
        print(f"Round-trip time took {total_time * 1000.0:.4f}ms")

        return outputs
コード例 #2
0
ファイル: server.py プロジェクト: kevinaer/sparseml
    def predict():
        # load raw images
        raw_data = flask.request.get_data()
        inputs = bytes_to_arrays(raw_data)
        print(f"Received {len(inputs)} images from client")

        # pre-processing
        preprocess_start_time = time.time()
        if not args.quantized_inputs:
            inputs = [inputs[0].astype(numpy.float32) / 255.0]
        preprocess_time = time.time() - preprocess_start_time
        print(f"Pre-processing time: {preprocess_time * 1000.0:.4f}ms")

        # inference
        print("Executing model")
        outputs, elapsed_time = engine.timed_run(inputs)
        print(f"Inference time: {elapsed_time * 1000.0:.4f}ms")

        # post-processing
        postprocess_start_time = time.time()
        outputs = postprocessor.pre_nms_postprocess(outputs)
        postprocess_time = time.time() - postprocess_start_time
        print(f"Post-processing, pre-nms time: {postprocess_time * 1000.0:.4f}ms")

        # NMS
        nms_start_time = time.time()
        outputs = postprocess_nms(outputs)
        nms_time = time.time() - nms_start_time
        print(f"nms time: {nms_time * 1000.0:.4f}ms")

        return arrays_to_bytes(outputs)
コード例 #3
0
ファイル: server.py プロジェクト: neuralmagic/deepsparse
    def run():
        data = flask.request.get_data()

        inputs = bytes_to_arrays(data)
        _LOGGER.info(f"Received {len(inputs)} inputs from client")

        _LOGGER.info("Executing model")
        outputs, elapsed_time = engine.timed_run(inputs)

        _LOGGER.info(
            f"Inference time took {elapsed_time * 1000.0:.4f} milliseconds")
        _LOGGER.info(f"Produced {len(outputs)} output tensors")
        return arrays_to_bytes(outputs)
コード例 #4
0
    def predict():
        """
        Expects data as bytes
        :returns: returns classification result as bytes
        """
        start = time.time()
        raw_data = flask.request.get_data()
        inputs = bytes_to_arrays(raw_data)
        print(f"Received {len(inputs)} images from client")

        result = predictor.timed_run(inputs)
        inference_time = (time.time() - start) * 1000

        print(f"inference time: {inference_time:.4f} ms")
        return arrays_to_bytes(result[0])
コード例 #5
0
    def classify(
        self,
        images: Union[str, numpy.ndarray, List[str], List[numpy.ndarray]],
    ) -> List[numpy.ndarray]:
        """
        :param images: list of numpy arrays of images to
            run the detection model on. Number of images should equal model batch size
        :return: list of post-processed object detection results for each image
            including class label, likelihood, and bounding box coordinates
        """

        if not isinstance(images, List):
            images = [images]

        print(f"Sending batch of {len(images)} images for detection to {self._url}")

        start = time.time()
        # Encode inputs
        data = arrays_to_bytes(images)
        preprocessing_end = time.time()
        preprocessing_time = preprocessing_end - start

        # Send data to server for inference
        response = requests.post(self._url, data=data)
        execution_end_time = time.time()
        engine_execution_time = execution_end_time - preprocessing_end

        # Decode outputs
        outputs = bytes_to_arrays(response.content)
        postprocessing_time = time.time() - execution_end_time
        total_time = time.time() - start

        print(
            f"Preprocessing time {preprocessing_time * 1000:.4f}ms",
            f"Engine run time {engine_execution_time * 1000:.4f}ms",
            f"Postprocessing time {postprocessing_time * 1000:.4f}ms",
            f"Round-trip time  {total_time * 1000.0:.4f}ms",
            sep="\n",
        )

        return outputs