Beispiel #1
0
def convert_method_to_trt_engine(module: torch.jit.ScriptModule,
                                 method_name: str, compile_spec: Any) -> str:
    """Convert a TorchScript module method to a serialized TensorRT engine

    Converts a specified method of a module to a serialized TensorRT engine given a dictionary of conversion settings

    Args:
        module (torch.jit.ScriptModule): Source module, a result of tracing or scripting a PyTorch
            ``torch.nn.Module``
        method_name (str): Name of method to convert
        compile_spec (dict): Compilation settings including operating precision, target device, etc.
            One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs
            to the graph. All other keys are optional

            .. code-block:: py

                CompileSpec = {
                    "input_shapes": [
                        (1, 3, 224, 224), # Static input shape for input #1
                        {
                            "min": (1, 3, 224, 224),
                            "opt": (1, 3, 512, 512),
                            "max": (1, 3, 1024, 1024)
                        } # Dynamic input shape for input #2
                    ],
                    "device": {
                        "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
                        "gpu_id": 0, # Target gpu id to run engine (Use Xavier as gpu id for DLA)
                        "dla_core": 0, # (DLA only) Target dla core id to run engine
                        "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
                    },
                    "op_precision": torch.half, # Operating precision set to FP16
                    "disable_tf32": False, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
                    "refit": false, # enable refit
                    "debug": false, # enable debuggable engine
                    "strict_types": false, # kernels should strictly run in operating precision
                    "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
                    "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
                    "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels
                    "workspace_size": 0, # Maximum size of workspace given to TensorRT
                    "max_batch_size": 0, # Maximum batch size (must be >= 1 to be set, 0 means not set)
                }

            Input Sizes can be specified as torch sizes, tuples or lists. Op precisions can be specified using
            torch datatypes or trtorch datatypes and you can use either torch devices or the trtorch device type enum
            to select device type.

    Returns:
        bytes: Serialized TensorRT engine, can either be saved to a file or deserialized via TensorRT APIs
    """
    if isinstance(module, torch.jit.ScriptFunction):
        raise TypeError(
            "torch.jit.ScriptFunctions currently are not directly supported, wrap the function in a module to compile"
        )

    return trtorch._C.convert_graph_to_trt_engine(
        module._c, method_name, _parse_compile_spec(compile_spec))
Beispiel #2
0
def compile(module: torch.jit.ScriptModule,
            compile_spec: Any) -> torch.jit.ScriptModule:
    """Compile a TorchScript module for NVIDIA GPUs using TensorRT

    Takes a existing TorchScript module and a set of settings to configure the compiler
    and will convert methods to JIT Graphs which call equivalent TensorRT engines

    Converts specifically the forward method of a TorchScript Module

    Args:
        module (torch.jit.ScriptModule): Source module, a result of tracing or scripting a PyTorch
            ``torch.nn.Module``
        compile_spec (dict): Compilation settings including operating precision, target device, etc.
            One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs
            to the graph. All other keys are optional

            .. code-block:: py

                compile_spec = {
                    "input_shapes": [
                        (1, 3, 224, 224), # Static input shape for input #1
                        {
                            "min": (1, 3, 224, 224),
                            "opt": (1, 3, 512, 512),
                            "max": (1, 3, 1024, 1024)
                        } # Dynamic input shape for input #2
                    ],
                    "op_precision": torch.half, # Operating precision set to FP16
                    "refit": false, # enable refit
                    "debug": false, # enable debuggable engine
                    "strict_types": false, # kernels should strictly run in operating precision
                    "allow_gpu_fallback": true, # (DLA only) Allow layers unsupported on DLA to run on GPU
                    "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
                    "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
                    "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
                    "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels
                    "workspace_size": 0, # Maximum size of workspace given to TensorRT
                    "max_batch_size": 0, # Maximum batch size (must be >= 1 to be set, 0 means not set)
                }

            Input Sizes can be specified as torch sizes, tuples or lists. Op precisions can be specified using
            torch datatypes or trtorch datatypes and you can use either torch devices or the trtorch device type enum
            to select device type.

    Returns:
        torch.jit.ScriptModule: Compiled TorchScript Module, when run it will execute via TensorRT
    """

    if isinstance(module, torch.jit.ScriptFunction):
        raise TypeError(
            "torch.jit.ScriptFunction currently is not directly supported, wrap the function in a module to compile"
        )

    compiled_cpp_mod = trtorch._C.compile_graph(
        module._c, _parse_compile_spec(compile_spec))
    compiled_module = torch.jit._recursive.wrap_cpp_module(compiled_cpp_mod)
    return compiled_module