Exemple #1
0
    def __init__(self, graph, opset=None, optimize=None, fold_constant=None):
        """
        Converts a TensorFlow model into ONNX.

        Args:
            graph (Union[Tuple[tf.Graph, Sequence[str]], Callable() -> Tuple[tf.Graph, Sequence[str]]]):
                    A tuple containing a TensorFlow graph and output names or a callable that returns one.


            opset (int): The ONNX opset to use during conversion.
            optimize (bool): Whether to use tf2onnx's graph optimization pass.
            fold_constant (bool):
                    Whether to fold constants in the TensorFlow Graph.
                    Requires that ``optimize`` is also enabled.
                    Defaults to True.
        """
        self._graph = graph
        self.opset = util.default(opset, 11)
        self.fold_constant = util.default(fold_constant, True)
        self.optimize = util.default(optimize, True)

        if self.fold_constant and not self.optimize:
            G_LOGGER.warning(
                "`fold_constant` is enabled, but `optimize` is disabled. Constant folding will not be performed"
            )
Exemple #2
0
    def __init__(
        self,
        graph,
        max_workspace_size=None,
        fp16=None,
        int8=None,
        max_batch_size=None,
        is_dynamic_op=False,
        minimum_segment_size=None,
    ):
        """
        Optimizes a TensorFlow model using TF-TRT.

        Args:
            graph (Callable() -> Tuple[tf.Graph, Sequence[str]]):
                    A callable that can supply a tuple containing a TensorFlow graph and output names.
            max_workspace_size (int): The maximum workspace size.
            fp16 (bool): Whether to run in FP16 mode.
            max_batch_size (int): The maximum batch size.
        """
        self._graph = graph
        self.max_workspace_size = util.default(max_workspace_size, 1 << 24)
        self.fp16 = util.default(fp16, False)
        self.int8 = util.default(int8, False)
        self.max_batch_size = util.default(max_batch_size, 1)
        self.is_dynamic_op = is_dynamic_op
        self.minimum_segment_size = util.default(minimum_segment_size, 3)
Exemple #3
0
    def __init__(self,
                 model,
                 error_ok=None,
                 external_data_dir=None,
                 save_to_disk_threshold_bytes=None):
        """
        Run shape inference on an ONNX model.

        Args:
            model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]):
                    An ONNX model or a callable that returns one, or a path to a model.
                    Supports models larger than the 2 GiB protobuf limit.

            error_ok (bool):
                    Whether errors during shape inference should be suppressed. Defaults to True.
            external_data_dir (str):
                    The directory where external data for the model is stored.
                    Only used if the model is provided via a path rather than a loader.
            save_to_disk_threshold_bytes (int):
                    The size in bytes above which a ModelProto will be serialized to the disk
                    before running shape inference.
                    This can be used to work around the 2 GiB protobuf limitation.
                    Defaults to ~2 GiB.
        """
        self._model = model
        self.error_ok = util.default(error_ok, True)
        self.external_data_dir = external_data_dir
        # Subtract a little so we're below the real threshold
        self.save_to_disk_threshold_bytes = util.default(
            save_to_disk_threshold_bytes, (2 << 30) - 8192)
Exemple #4
0
 def __init__(self, explicit_precision, explicit_batch=None):
     """
     Args:
         explicit_precision (bool): Whether to create the network with explicit precision enabled.
     """
     self.explicit_precision = util.default(explicit_precision, False)
     self.explicit_batch = util.default(explicit_batch, True)
Exemple #5
0
    def __init__(
        self,
        model,
        num_passes=None,
        do_shape_inference=None,
        partitioning=None,
        fold_shapes=None,
        copy=None,
        error_ok=None,
    ):
        """
        Fold constants in an ONNX model.

        Args:
            model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]):
                    An ONNX model or a callable that returns one.

            num_passes (int):
                    The number of constant folding passes to run.
                    Sometimes, subgraphs that compute tensor shapes may not be foldable in a single pass.
                    By default, Polygraphy will automatically determine the number of passes required.
            do_shape_inference (bool):
                    Whether to run shape inference in the model between passes.
                    This enables the loader to fold `Shape` nodes.
                    Only effective if `fold_shapes` is True.
                    Defaults to True.
            partitioning (Union[str, None]):
                    Whether/How to partition the graph so that errors in folding one
                    part of a model do not affect other parts. Available modes are:

                    - None: Do not partition the graph. If inference fails, no constants are folded.
                    - 'basic': Partition the graph. If inference fails in one partition, other partitions will remain unaffected.
                    - 'recursive': Parition the graph recursively. If inference fails in a partition, the partition will be further partitioned.

                    Defaults to None.
            fold_shapes (bool):
                    Whether to fold `Shape` nodes in the graph.
                    This requires shapes to be inferred in the graph, and can only fold
                    static shapes.
                    Defaults to True.
            copy (bool):
                    Whether to create a copy of the model first.
                    Defaults to False.
            error_ok (bool):
                    Whether to suppress errors during constant folding.
                    If this is set to `False`, errors will be re-raised.
                    Defaults to True.
        """
        super().__init__(model, copy)
        self.num_passes = num_passes
        self.do_shape_inference = util.default(do_shape_inference, True)
        self.partitioning = partitioning
        self.fold_shapes = util.default(fold_shapes, True)
        self.error_ok = util.default(error_ok, True)
Exemple #6
0
 def __init__(self, shape=None, dtype=None):
     """
     Args:
         shape (Tuple[int]): The initial shape of the buffer.
         dtype (numpy.dtype): The data type of the buffer.
     """
     super().__init__(ptr=0,
                      shape=util.default(shape, tuple()),
                      dtype=util.default(dtype, np.float32))
     self.allocated_nbytes = 0
     self.resize(self.shape)
Exemple #7
0
    def __init__(
        self,
        network_loader=None,
        max_workspace_size=None,
        max_batch_size=None,
        fp16=None,
        tf32=None,
        load_engine=None,
        save_engine=None,
        layerwise=False,
        plugins=[],
        name=None,
    ):
        """
        Creates a runner that manages a single TensorRT engine.


            network_loader (BaseModelLoader):
                    A loader that returns a TRT builder, network, parser and input shapes.
            max_workspace_size (int): The maximum workspace size.
            max_batch_size (int): The maximum batch size.
            fp16 (bool): Whether to run in fp16 mode
            layerwise (bool): Whether to retrieve the outputs of every layer in the network.
            name (str):
                    The human-readable name prefix to use for this runner.
                    A runner count and timestamp will be appended to this prefix.
        """
        G_LOGGER.warning(
            "TrtLegacyRunner is deprecated, and will be removed in a future release"
        )
        # Load any user-supplied plugin libraries. This must happen before everything else, including engine deserialization.
        if plugins:
            import ctypes

            for plugin in plugins:
                path = os.path.abspath(plugin)
                G_LOGGER.info("Loading plugin library: {:}".format(path))
                ctypes.CDLL(path)

        # Choose a unique name for this runner.
        super().__init__(name=name, prefix="trt-legacy-runner")

        # Save parameters for activate and deactivate.
        self.network_loader = network_loader
        self.max_workspace_size = util.default(max_workspace_size, 1 << 24)
        self.fp16 = util.default(fp16, False)
        self.tf32 = util.default(tf32, False)
        self.load_engine = load_engine

        self.engine_path = save_engine

        self.layerwise = layerwise
        self.max_batch_size = max_batch_size
Exemple #8
0
    def __init__(self, explicit_precision=None, explicit_batch=None):
        """
        Creates an empty TensorRT network.

        Args:
            explicit_precision (bool):
                    Whether to create the network with explicit precision enabled. Defaults to False
            explicit_batch (bool):
                    Whether to create the network with explicit batch mode. Defaults to True.
        """
        self.explicit_precision = util.default(explicit_precision, False)
        self.explicit_batch = util.default(explicit_batch, True)
Exemple #9
0
 def default_tuple(tup, default):
     if tup is None or (not isinstance(tup, tuple) and not isinstance(tup, list)):
         return default
     new_tup = []
     for elem, default_elem in zip(tup, default):
         new_tup.append(util.default(elem, default_elem))
     return tuple(new_tup)
Exemple #10
0
        def determine_model_type():
            if args_util.get(args, "model_type") is not None:
                return args.model_type.lower()

            if args_util.get(args, "model_file") is None:
                return None

            def use_ext(ext_mapping):
                file_ext = os.path.splitext(args.model_file)[-1]
                if file_ext in ext_mapping:
                    return ext_mapping[file_ext]

            runners = util.default(args_util.get(args, "runners"), [])
            if args_util.get(args, "ckpt") or os.path.isdir(args.model_file):
                return "ckpt"
            elif "tf" in runners or "trt_legacy" in runners:
                if args.caffe_model:
                    return "caffe"
                return use_ext(ModelArgs.EXT_MODEL_TYPE_MAPPING) or "frozen"
            else:
                model_type = use_ext(ModelArgs.EXT_MODEL_TYPE_MAPPING)
                if model_type:
                    return model_type

            G_LOGGER.critical(
                "Could not automatically determine model type for: {:}\n"
                "Please explicitly specify the type with the --model-type option".format(args.model_file)
            )
Exemple #11
0
def parse_dict_with_default(arg_lst, cast_to=None, sep=None):
    """
    Generate a dictionary from a list of arguments of the form:
    ``<key>:<val>``. If ``<key>`` is empty, the value will be assigned
    to an empty string key in the returned mapping.

    Args:
        arg_lst (List[str]):
                The arguments to map.

        cast_to (type):
                The type to cast the values in the map. By default,
                uses the type returned by ``cast``.
        sep (str):
                The separator between the key and value strings.
    Returns:
        Dict[str, obj]: The mapping.
    """
    sep = util.default(sep, ":")


    if arg_lst is None:
        return

    arg_map = {}
    for arg in arg_lst:
        key, _, val = arg.rpartition(sep)
        val = cast(val)
        if cast_to:
            val = cast_to(val)
        arg_map[key] = val
    return arg_map
Exemple #12
0
    def add_loader(self, loader_str, loader_id, suffix=None):
        """
        Adds a loader to the script.
        If the loader is a duplicate, returns the existing loader instead.

        Args:
            loader_str (str):
                    A string constructing the loader.
                    For security reasons, this must be generated using
                    `make_invocable` or `Script.invoke_if_non_default`.
            loader_id (str): A short human-friendly identifier for the loader

        Returns:
            str: The name of the loader added.
        """
        suffix = util.default(suffix, "")
        loader_str = ensure_safe(loader_str).unwrap()

        if loader_str in self.loaders:
            return self.loaders[loader_str]

        unique_name = loader_id + suffix
        if self.loader_count[unique_name]:
            unique_name = "{:}_{:}".format(unique_name,
                                           self.loader_count[loader_id])
        unique_name = Script.String(unique_name, safe=True, inline=True)

        self.loader_count[loader_id] += 1
        self.loaders[loader_str] = unique_name
        return unique_name
Exemple #13
0
    def __init__(self, network, config=None, save_timing_cache=None):
        """
        Builds and serializes TensorRT engine.

        Args:
            network (Callable() -> trt.Builder, trt.INetworkDefinition):
                    A callable capable of returning a TensorRT Builder and INetworkDefinition. The returned builder
                    and network are owned by EngineFromNetwork and should not be freed manually. The callable may
                    have at most 3 return values if another object needs to be kept alive for the duration of the network,
                    e.g., in the case of a parser. EngineFromNetwork will take ownership of the third return value, and,
                    like the network, it should not be freed by the callable. The first and second return values must
                    always be the builder and network respectively.
                    If instead of a loader, the network, builder, and optional parser arguments are provided directly,
                    then EngineFromNetwork will *not* deallocate them.


            config (Callable(trt.Builder, trt.INetworkDefinition) -> trt.IBuilderConfig):
                    A callable that returns a TensorRT builder configuration. If not supplied,
                    a `CreateConfig` instance with default parameters is used.
            save_timing_cache (Union[str, file-like]):
                    A path or file-like object at which to save a tactic timing cache.
                    Any existing cache will be overwritten. Note that if the provided config includes a tactic
                    timing cache, the data from that cache will be copied into the new cache.
        """
        self._network = network
        self._config = util.default(config, CreateConfig())
        self.timing_cache_path = save_timing_cache
Exemple #14
0
    def __init__(self,
                 model,
                 input_metadata=None,
                 output_metadata=None,
                 check_meta=None):
        """
        Extracts a subgraph from an ONNX model.

        Args:
            model (Union[Union[onnx.ModelProto, onnx_graphsurgeon.Graph], Callable() -> Union[onnx.ModelProto, onnx_graphsurgeon.Graph]]):
                    An ONNX model or ONNX-GraphSurgeon Graph or a callable that returns one.

            input_metadata (TensorMetadata):
                    Metadata for the inputs of the subgraph.
                    Name, shape, and data type are required.
                    If not provided, the graph outputs are not modified.
            output_metadata (TensorMetadata):
                    Metadata for the outputs of the subgraph.
                    Name and data type are required.
                    If not provided, the graph outputs are not modified.
            check_meta (bool):
                    Whether to check that the provided input and output metadata include
                    all the expected fields.
                    Defaults to True.
        """
        self._model = model
        self.input_metadata = input_metadata
        self.output_metadata = output_metadata
        self.check_meta = util.default(check_meta, True)
Exemple #15
0
    def fill_defaults(self, network, default_shape_value=None):
        """
        Fill this profile with sane default values for any bindings whose
        shapes have not been set explicitly.

        Args:
            network (trt.INetworkDefinition):
                    The TensorRT network this profile is meant for.
                    This will be used to determine model inputs and their shapes.
            default_shape_value (int):
                    The value to use to override dynamic dimensions.

        Returns:
            Profile: Self
        """
        default_shape_value = util.default(default_shape_value,
                                           constants.DEFAULT_SHAPE_VALUE)

        for idx in range(network.num_inputs):
            inp = network.get_input(idx)

            if inp.name in self:
                continue

            with G_LOGGER.verbosity(
                    G_LOGGER.CRITICAL):  # WAR for spam from TRT
                is_shape_tensor = inp.is_shape_tensor
            if is_shape_tensor:
                rank = inp.shape[0]
                shape = (default_shape_value, ) * rank
                G_LOGGER.warning(
                    "{:} | No values provided; Will use input values: {:} for min/opt/max in profile.\n"
                    .format(trt_util.str_from_tensor(inp, is_shape_tensor),
                            shape, rank),
                    mode=LogMode.ONCE,
                )
                G_LOGGER.warning(
                    "This will cause the shape-tensor to have static values. If this is incorrect, please "
                    "set the range of values for this input shape-tensor.",
                    mode=LogMode.ONCE,
                )
            else:
                shape = util.override_dynamic_shape(inp.shape,
                                                    default_shape_value)
                if shape != inp.shape:
                    G_LOGGER.warning(
                        "{:} | No shapes provided; Will use shape: {:} for min/opt/max in profile.\n"
                        .format(trt_util.str_from_tensor(inp, is_shape_tensor),
                                shape),
                        mode=LogMode.ONCE,
                    )
                    G_LOGGER.warning(
                        "This will cause the tensor to have a static shape. If this is incorrect, please "
                        "set the range of shapes for this input tensor.",
                        mode=LogMode.ONCE,
                    )

            self.add(inp.name, shape, shape, shape)
        return self
Exemple #16
0
    def __init__(self,
                 gpu_memory_fraction=None,
                 allow_growth=None,
                 use_xla=None):
        """
        Creates a TensorFlow config.

        Args:
            gpu_memory_fraction (float):
                The fraction of GPU memory that will be made available to TensorFlow.
                This should be a value between 0.0 and 1.0.
            allow_growth (bool): Whether to allow GPU memory allocated by TensorFlow to grow.
            use_xla (bool): Whether to attempt to enable XLA.
        """
        self.gpu_memory_fraction = util.default(gpu_memory_fraction, 0.9)
        self.allow_growth = util.default(allow_growth, False)
        self.use_xla = util.default(use_xla, False)
Exemple #17
0
    def __init__(self, deploy, model, outputs, batch_size=None, dtype=None):
        self.deploy = deploy
        self.model = model
        if not self.model:
            G_LOGGER.warning(
                "No model file provided for Caffe model, random weights will be used. To avoid this, "
                "please set the model paramater, or --model")

        if not outputs:
            G_LOGGER.critical(
                "Please set Caffe model outputs using the outputs parameter, or --trt-outputs. "
                "Note: To determine possible outputs, try running: tail -n50 {:}"
                .format(deploy))

        self.outputs = outputs
        self.dtype = util.default(dtype, trt.float32)
        self.batch_size = util.default(batch_size, 1)
Exemple #18
0
    def parse(self, args):
        self.verbosity_count = args_util.get(args, "verbose") - args_util.get(
            args, "quiet")
        self.silent = args_util.get(args, "silent")
        self.log_format = util.default(args_util.get(args, "log_format"), [])
        self.log_file = args_util.get(args, "log_file")

        # Enable logger settings immediately on parsing.
        self.get_logger()
Exemple #19
0
    def __init__(self, model, copy=None):
        """
        Args:
            model (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model.

            copy (bool): Whether to create a copy of the model first. Defaults to False.
        """
        self._model = model
        self.copy = util.default(copy, False)
Exemple #20
0
    def parse(self, args):
        def determine_model_type():
            if args_util.get(args, "model_type") is not None:
                return args.model_type.lower()

            if args_util.get(args, "model_file") is None:
                return None

            def use_ext(ext_mapping):
                file_ext = os.path.splitext(args.model_file)[-1]
                if file_ext in ext_mapping:
                    return ext_mapping[file_ext]

            runners = util.default(args_util.get(args, "runners"), [])
            if args_util.get(args, "ckpt") or os.path.isdir(args.model_file):
                return "ckpt"
            elif "tf" in runners or "trt_legacy" in runners:
                if args.caffe_model:
                    return "caffe"
                return use_ext(ModelArgs.EXT_MODEL_TYPE_MAPPING) or "frozen"
            else:
                model_type = use_ext(ModelArgs.EXT_MODEL_TYPE_MAPPING)
                if model_type:
                    return model_type

            G_LOGGER.exit(
                "Could not automatically determine model type for: {:}\n"
                "Please explicitly specify the type with the --model-type option"
                .format(args.model_file))

        if args_util.get(args, "input_shapes"):
            self.input_shapes = args_util.parse_meta(
                args_util.get(args, "input_shapes"),
                includes_dtype=False)  # TensorMetadata
        else:
            self.input_shapes = TensorMetadata()

        self.model_file = args_util.get(args, "model_file")

        if self.model_file:
            G_LOGGER.verbose("Model: {:}".format(self.model_file))
            if not os.path.exists(self.model_file):
                G_LOGGER.warning("Model path does not exist: {:}".format(
                    self.model_file))
            self.model_file = os.path.abspath(self.model_file)

        model_type_str = util.default(self._model_type, determine_model_type())
        self.model_type = ModelArgs.ModelType(
            model_type_str) if model_type_str else None

        if self.model_type == "trt-network-script" and (
                not self.model_file or not self.model_file.endswith(".py")):
            G_LOGGER.exit(
                "TensorRT network scripts must exist and have '.py' extensions. "
                "Note: Provided network script path was: {:}".format(
                    self.model_file))
Exemple #21
0
def wrapper():
    """
    Returns the global Polygraphy CUDA wrapper.

    Returns:
        Cuda: The global CUDA wrapper.
    """
    global G_CUDA
    G_CUDA = util.default(G_CUDA, Cuda())
    return G_CUDA
Exemple #22
0
    def call_impl(self):
        """
        Returns:
            onnx.ModelProto: The model, after saving it.
        """
        model, _ = util.invoke_if_callable(self._model)
        G_LOGGER.info("Saving ONNX model to: {:}".format(self.path))
        if self.external_data_path is not None:
            G_LOGGER.verbose(
                "Saving external data for ONNX model to: {:}".format(
                    self.external_data_path))
            try:
                external_data_helper.convert_model_to_external_data(
                    model,
                    location=self.external_data_path,
                    all_tensors_to_one_file=util.default(
                        self.all_tensors_to_one_file, True),
                    size_threshold=util.default(self.size_threshold, 1024),
                )
            except TypeError:
                if self.size_threshold is not None:
                    G_LOGGER.warning(
                        "This version of onnx does not support size_threshold in convert_model_to_external_data"
                    )
                external_data_helper.convert_model_to_external_data(
                    model,
                    location=self.external_data_path,
                    all_tensors_to_one_file=util.default(
                        self.all_tensors_to_one_file, True),
                )
        else:
            if self.size_threshold is not None:
                G_LOGGER.warning(
                    "size_threshold is set, but external data path has not been set. "
                    "No external data will be written.")
            if self.all_tensors_to_one_file is not None:
                G_LOGGER.warning(
                    "all_tensors_to_one_file is set, but external data path has not been set. "
                    "No external data will be written.")

        util.makedirs(self.path)
        onnx.save(model, self.path)
        return model
Exemple #23
0
    def __init__(self, model, copy=None):
        """
        Args:
            model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]):
                    An ONNX model or a callable that returns one.

            copy (bool): Whether to create a copy of the model first. Defaults to False.
        """
        self._model = model
        self.copy = util.default(copy, False)
Exemple #24
0
 def __init__(self, model_required=False, inputs="--inputs", model_type=None, inputs_doc=None):
     super().__init__()
     self._model_required = model_required
     self._inputs = inputs
     # If model type is provided, it means the tool only supports a single type of model.
     self._model_type = model_type
     self._inputs_doc = util.default(
         inputs_doc,
         "Model input(s) and their shape(s). "
         "Used to determine shapes to use while generating input data for inference",
     )
Exemple #25
0
    def __init__(self, model, error_ok=None):
        """
        Run shape inference on an ONNX model.

        Args:
            model (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model.

            error_ok (bool): Whether errors during shape inference should be suppressed. Defaults to True.
        """
        self._model = model
        self.error_ok = util.default(error_ok, True)
Exemple #26
0
    def __init__(self, graph, config=None):
        """
        Creates a TensorFlow session.

        Args:
            graph (Union[Tuple[tf.Graph, Sequence[str]], Callable() -> Tuple[tf.Graph, Sequence[str]]]):
                    A tuple containing a TensorFlow graph and output names or a callable that returns one.


            config (Union[tf.ConfigProto, Callable() -> tf.ConfigProto]):
                    A TensorFlow ConfigProto or a callable that returns one.
        """
        self.graph = graph
        self.config = util.default(config, CreateConfig())
Exemple #27
0
    def __init__(self, graph, config=None):
        """
        Creates a TensorFlow session.

        Args:
            graph (Callable() -> Tuple[tf.Graph, Sequence[str]]):
                    A callable that can supply a tuple containing a
                    TensorFlow graph and output names.


            config (Callable() -> tf.ConfigProto):
        """
        self.graph = graph
        self.config = util.default(config, CreateConfig())
Exemple #28
0
    def __init__(self, plugins=None, obj=None):
        """
        Loads plugins from the specified paths.

        Args:
            plugins (List[str]):
                    A list of paths to plugin libraries to load before inference.
            obj (BaseLoader):
                    An object or callable to return or call respectively.
                    If ``obj`` is callable, extra parameters will be forwarded to ``obj``.
                    If ``obj`` is not callable, it will be returned.
        """
        self.plugins = util.default(plugins, [])
        self.obj = obj
Exemple #29
0
    def __init__(self, arg_group, deps=None):
        self.deps = util.default(deps, [])

        self.arg_group = arg_group
        self.parser = argparse.ArgumentParser()
        for dep in self.deps:
            for other_dep in self.deps:
                other_dep.register(dep)
            self.arg_group.register(dep)
        self.arg_group.check_registered()

        for dep in self.deps:
            dep.add_to_parser(self.parser)
        self.arg_group.add_to_parser(self.parser)
Exemple #30
0
        def __init__(self):
            # Must explicitly initialize parent for any trampoline class! Will mysteriously segfault without this.
            BaseClass.__init__(self)

            self.is_active = False

            self.data_loader = data_loader
            self._cache = cache
            self.device_buffers = OrderedDict()
            self.reset()
            G_LOGGER.verbose("Created calibrator [cache={:}]".format(self._cache))

            self.batch_size = util.default(batch_size, 1)

            # The function that constructed this instance
            self.make_func = Calibrator