def framework_info() -> FrameworkInfo: """ Detect the information for the deepsparse framework such as package versions, availability for core actions such as training and inference, sparsification support, and inference provider support. :return: The framework info for deepsparse :rtype: FrameworkInfo """ arch = {} if check_deepsparse_install(raise_on_error=False): from deepsparse.cpu import cpu_architecture arch = cpu_architecture() cpu_warnings = [] if arch and arch.isa != "avx512": cpu_warnings.append( "AVX512 instruction set not detected, inference performance will be limited" ) if arch and arch.isa != "avx512" and arch.isa != "avx2": cpu_warnings.append("AVX2 and AVX512 instruction sets not detected, " "inference performance will be severely limited") if arch and not arch.vnni: cpu_warnings.append("VNNI instruction set not detected, " "quantized inference performance will be limited") cpu_provider = FrameworkInferenceProviderInfo( name="cpu", description= ("Performant CPU provider within DeepSparse specializing in speedup of " "sparsified models using AVX and VNNI instruction sets"), device="cpu", supported_sparsification=SparsificationInfo( ), # TODO: fill in when available available=check_deepsparse_install(raise_on_error=False), properties={ "cpu_architecture": arch, }, warnings=cpu_warnings, ) return FrameworkInfo( framework=Framework.deepsparse, package_versions={ "deepsparse": get_version(package_name="deepsparse", raise_on_error=False), "sparsezoo": get_version(package_name="sparsezoo", raise_on_error=False), "sparseml": get_version(package_name="sparseml", raise_on_error=False), }, sparsification=sparsification_info(), inference_providers=[cpu_provider], training_available=False, sparsification_available=False, exporting_onnx_available=False, inference_available=True, )
class ServerConfig(BaseModel): """ A configuration for serving models in the DeepSparse inference server """ models: List[ServeModelConfig] = Field( default=[], description= ("The models to serve in the server defined by the additional arguments" ), ) workers: str = Field( default=max(1, cpu_architecture().num_available_physical_cores // 2), description= ("The number of maximum workers to use for processing pipeline requests. " "Defaults to the number of physical cores on the device."), )
onnxruntime = None ort_import_error = ort_import_err try: # flake8: noqa from deepsparse.cpu import cpu_architecture except ImportError: raise ImportError( "Unable to import deepsparse python apis. " "Please contact [email protected]" ) __all__ = ["ORTEngine"] ARCH = cpu_architecture() NUM_CORES = ARCH.num_available_physical_cores def _validate_ort_import(): if ort_import_error is not None: raise ImportError( "An exception occurred when importing onxxruntime. Please verify that " "onnxruntime is installed in order to use the onnxruntime inference " f"engine. \n\nException info: {ort_import_error}" ) def _validate_batch_size(batch_size: int) -> int: if batch_size < 1: raise ValueError("batch_size must be greater than 0")