Example #1
0
def framework_info() -> FrameworkInfo:
    """
    Detect the information for the deepsparse framework such as package versions,
    availability for core actions such as training and inference,
    sparsification support, and inference provider support.

    :return: The framework info for deepsparse
    :rtype: FrameworkInfo
    """
    arch = {}

    if check_deepsparse_install(raise_on_error=False):
        from deepsparse.cpu import cpu_architecture

        arch = cpu_architecture()

    cpu_warnings = []
    if arch and arch.isa != "avx512":
        cpu_warnings.append(
            "AVX512 instruction set not detected, inference performance will be limited"
        )
    if arch and arch.isa != "avx512" and arch.isa != "avx2":
        cpu_warnings.append("AVX2 and AVX512 instruction sets not detected, "
                            "inference performance will be severely limited")
    if arch and not arch.vnni:
        cpu_warnings.append("VNNI instruction set not detected, "
                            "quantized inference performance will be limited")

    cpu_provider = FrameworkInferenceProviderInfo(
        name="cpu",
        description=
        ("Performant CPU provider within DeepSparse specializing in speedup of "
         "sparsified models using AVX and VNNI instruction sets"),
        device="cpu",
        supported_sparsification=SparsificationInfo(
        ),  # TODO: fill in when available
        available=check_deepsparse_install(raise_on_error=False),
        properties={
            "cpu_architecture": arch,
        },
        warnings=cpu_warnings,
    )

    return FrameworkInfo(
        framework=Framework.deepsparse,
        package_versions={
            "deepsparse":
            get_version(package_name="deepsparse", raise_on_error=False),
            "sparsezoo":
            get_version(package_name="sparsezoo", raise_on_error=False),
            "sparseml":
            get_version(package_name="sparseml", raise_on_error=False),
        },
        sparsification=sparsification_info(),
        inference_providers=[cpu_provider],
        training_available=False,
        sparsification_available=False,
        exporting_onnx_available=False,
        inference_available=True,
    )
Example #2
0
class ServerConfig(BaseModel):
    """
    A configuration for serving models in the DeepSparse inference server
    """

    models: List[ServeModelConfig] = Field(
        default=[],
        description=
        ("The models to serve in the server defined by the additional arguments"
         ),
    )
    workers: str = Field(
        default=max(1,
                    cpu_architecture().num_available_physical_cores // 2),
        description=
        ("The number of maximum workers to use for processing pipeline requests. "
         "Defaults to the number of physical cores on the device."),
    )
Example #3
0
    onnxruntime = None
    ort_import_error = ort_import_err


try:
    # flake8: noqa
    from deepsparse.cpu import cpu_architecture
except ImportError:
    raise ImportError(
        "Unable to import deepsparse python apis. "
        "Please contact [email protected]"
    )

__all__ = ["ORTEngine"]

ARCH = cpu_architecture()
NUM_CORES = ARCH.num_available_physical_cores


def _validate_ort_import():
    if ort_import_error is not None:
        raise ImportError(
            "An exception occurred when importing onxxruntime. Please verify that "
            "onnxruntime is installed in order to use the onnxruntime inference "
            f"engine. \n\nException info: {ort_import_error}"
        )


def _validate_batch_size(batch_size: int) -> int:
    if batch_size < 1:
        raise ValueError("batch_size must be greater than 0")