def __init__(self, graph, opset=None, optimize=None, fold_constant=None): """ Converts a TensorFlow model into ONNX. Args: graph (Union[Tuple[tf.Graph, Sequence[str]], Callable() -> Tuple[tf.Graph, Sequence[str]]]): A tuple containing a TensorFlow graph and output names or a callable that returns one. opset (int): The ONNX opset to use during conversion. optimize (bool): Whether to use tf2onnx's graph optimization pass. fold_constant (bool): Whether to fold constants in the TensorFlow Graph. Requires that ``optimize`` is also enabled. Defaults to True. """ self._graph = graph self.opset = util.default(opset, 11) self.fold_constant = util.default(fold_constant, True) self.optimize = util.default(optimize, True) if self.fold_constant and not self.optimize: G_LOGGER.warning( "`fold_constant` is enabled, but `optimize` is disabled. Constant folding will not be performed" )
def __init__( self, graph, max_workspace_size=None, fp16=None, int8=None, max_batch_size=None, is_dynamic_op=False, minimum_segment_size=None, ): """ Optimizes a TensorFlow model using TF-TRT. Args: graph (Callable() -> Tuple[tf.Graph, Sequence[str]]): A callable that can supply a tuple containing a TensorFlow graph and output names. max_workspace_size (int): The maximum workspace size. fp16 (bool): Whether to run in FP16 mode. max_batch_size (int): The maximum batch size. """ self._graph = graph self.max_workspace_size = util.default(max_workspace_size, 1 << 24) self.fp16 = util.default(fp16, False) self.int8 = util.default(int8, False) self.max_batch_size = util.default(max_batch_size, 1) self.is_dynamic_op = is_dynamic_op self.minimum_segment_size = util.default(minimum_segment_size, 3)
def __init__(self, model, error_ok=None, external_data_dir=None, save_to_disk_threshold_bytes=None): """ Run shape inference on an ONNX model. Args: model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): An ONNX model or a callable that returns one, or a path to a model. Supports models larger than the 2 GiB protobuf limit. error_ok (bool): Whether errors during shape inference should be suppressed. Defaults to True. external_data_dir (str): The directory where external data for the model is stored. Only used if the model is provided via a path rather than a loader. save_to_disk_threshold_bytes (int): The size in bytes above which a ModelProto will be serialized to the disk before running shape inference. This can be used to work around the 2 GiB protobuf limitation. Defaults to ~2 GiB. """ self._model = model self.error_ok = util.default(error_ok, True) self.external_data_dir = external_data_dir # Subtract a little so we're below the real threshold self.save_to_disk_threshold_bytes = util.default( save_to_disk_threshold_bytes, (2 << 30) - 8192)
def __init__(self, explicit_precision, explicit_batch=None): """ Args: explicit_precision (bool): Whether to create the network with explicit precision enabled. """ self.explicit_precision = util.default(explicit_precision, False) self.explicit_batch = util.default(explicit_batch, True)
def __init__( self, model, num_passes=None, do_shape_inference=None, partitioning=None, fold_shapes=None, copy=None, error_ok=None, ): """ Fold constants in an ONNX model. Args: model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): An ONNX model or a callable that returns one. num_passes (int): The number of constant folding passes to run. Sometimes, subgraphs that compute tensor shapes may not be foldable in a single pass. By default, Polygraphy will automatically determine the number of passes required. do_shape_inference (bool): Whether to run shape inference in the model between passes. This enables the loader to fold `Shape` nodes. Only effective if `fold_shapes` is True. Defaults to True. partitioning (Union[str, None]): Whether/How to partition the graph so that errors in folding one part of a model do not affect other parts. Available modes are: - None: Do not partition the graph. If inference fails, no constants are folded. - 'basic': Partition the graph. If inference fails in one partition, other partitions will remain unaffected. - 'recursive': Parition the graph recursively. If inference fails in a partition, the partition will be further partitioned. Defaults to None. fold_shapes (bool): Whether to fold `Shape` nodes in the graph. This requires shapes to be inferred in the graph, and can only fold static shapes. Defaults to True. copy (bool): Whether to create a copy of the model first. Defaults to False. error_ok (bool): Whether to suppress errors during constant folding. If this is set to `False`, errors will be re-raised. Defaults to True. """ super().__init__(model, copy) self.num_passes = num_passes self.do_shape_inference = util.default(do_shape_inference, True) self.partitioning = partitioning self.fold_shapes = util.default(fold_shapes, True) self.error_ok = util.default(error_ok, True)
def __init__(self, shape=None, dtype=None): """ Args: shape (Tuple[int]): The initial shape of the buffer. dtype (numpy.dtype): The data type of the buffer. """ super().__init__(ptr=0, shape=util.default(shape, tuple()), dtype=util.default(dtype, np.float32)) self.allocated_nbytes = 0 self.resize(self.shape)
def __init__( self, network_loader=None, max_workspace_size=None, max_batch_size=None, fp16=None, tf32=None, load_engine=None, save_engine=None, layerwise=False, plugins=[], name=None, ): """ Creates a runner that manages a single TensorRT engine. network_loader (BaseModelLoader): A loader that returns a TRT builder, network, parser and input shapes. max_workspace_size (int): The maximum workspace size. max_batch_size (int): The maximum batch size. fp16 (bool): Whether to run in fp16 mode layerwise (bool): Whether to retrieve the outputs of every layer in the network. name (str): The human-readable name prefix to use for this runner. A runner count and timestamp will be appended to this prefix. """ G_LOGGER.warning( "TrtLegacyRunner is deprecated, and will be removed in a future release" ) # Load any user-supplied plugin libraries. This must happen before everything else, including engine deserialization. if plugins: import ctypes for plugin in plugins: path = os.path.abspath(plugin) G_LOGGER.info("Loading plugin library: {:}".format(path)) ctypes.CDLL(path) # Choose a unique name for this runner. super().__init__(name=name, prefix="trt-legacy-runner") # Save parameters for activate and deactivate. self.network_loader = network_loader self.max_workspace_size = util.default(max_workspace_size, 1 << 24) self.fp16 = util.default(fp16, False) self.tf32 = util.default(tf32, False) self.load_engine = load_engine self.engine_path = save_engine self.layerwise = layerwise self.max_batch_size = max_batch_size
def __init__(self, explicit_precision=None, explicit_batch=None): """ Creates an empty TensorRT network. Args: explicit_precision (bool): Whether to create the network with explicit precision enabled. Defaults to False explicit_batch (bool): Whether to create the network with explicit batch mode. Defaults to True. """ self.explicit_precision = util.default(explicit_precision, False) self.explicit_batch = util.default(explicit_batch, True)
def default_tuple(tup, default): if tup is None or (not isinstance(tup, tuple) and not isinstance(tup, list)): return default new_tup = [] for elem, default_elem in zip(tup, default): new_tup.append(util.default(elem, default_elem)) return tuple(new_tup)
def determine_model_type(): if args_util.get(args, "model_type") is not None: return args.model_type.lower() if args_util.get(args, "model_file") is None: return None def use_ext(ext_mapping): file_ext = os.path.splitext(args.model_file)[-1] if file_ext in ext_mapping: return ext_mapping[file_ext] runners = util.default(args_util.get(args, "runners"), []) if args_util.get(args, "ckpt") or os.path.isdir(args.model_file): return "ckpt" elif "tf" in runners or "trt_legacy" in runners: if args.caffe_model: return "caffe" return use_ext(ModelArgs.EXT_MODEL_TYPE_MAPPING) or "frozen" else: model_type = use_ext(ModelArgs.EXT_MODEL_TYPE_MAPPING) if model_type: return model_type G_LOGGER.critical( "Could not automatically determine model type for: {:}\n" "Please explicitly specify the type with the --model-type option".format(args.model_file) )
def parse_dict_with_default(arg_lst, cast_to=None, sep=None): """ Generate a dictionary from a list of arguments of the form: ``<key>:<val>``. If ``<key>`` is empty, the value will be assigned to an empty string key in the returned mapping. Args: arg_lst (List[str]): The arguments to map. cast_to (type): The type to cast the values in the map. By default, uses the type returned by ``cast``. sep (str): The separator between the key and value strings. Returns: Dict[str, obj]: The mapping. """ sep = util.default(sep, ":") if arg_lst is None: return arg_map = {} for arg in arg_lst: key, _, val = arg.rpartition(sep) val = cast(val) if cast_to: val = cast_to(val) arg_map[key] = val return arg_map
def add_loader(self, loader_str, loader_id, suffix=None): """ Adds a loader to the script. If the loader is a duplicate, returns the existing loader instead. Args: loader_str (str): A string constructing the loader. For security reasons, this must be generated using `make_invocable` or `Script.invoke_if_non_default`. loader_id (str): A short human-friendly identifier for the loader Returns: str: The name of the loader added. """ suffix = util.default(suffix, "") loader_str = ensure_safe(loader_str).unwrap() if loader_str in self.loaders: return self.loaders[loader_str] unique_name = loader_id + suffix if self.loader_count[unique_name]: unique_name = "{:}_{:}".format(unique_name, self.loader_count[loader_id]) unique_name = Script.String(unique_name, safe=True, inline=True) self.loader_count[loader_id] += 1 self.loaders[loader_str] = unique_name return unique_name
def __init__(self, network, config=None, save_timing_cache=None): """ Builds and serializes TensorRT engine. Args: network (Callable() -> trt.Builder, trt.INetworkDefinition): A callable capable of returning a TensorRT Builder and INetworkDefinition. The returned builder and network are owned by EngineFromNetwork and should not be freed manually. The callable may have at most 3 return values if another object needs to be kept alive for the duration of the network, e.g., in the case of a parser. EngineFromNetwork will take ownership of the third return value, and, like the network, it should not be freed by the callable. The first and second return values must always be the builder and network respectively. If instead of a loader, the network, builder, and optional parser arguments are provided directly, then EngineFromNetwork will *not* deallocate them. config (Callable(trt.Builder, trt.INetworkDefinition) -> trt.IBuilderConfig): A callable that returns a TensorRT builder configuration. If not supplied, a `CreateConfig` instance with default parameters is used. save_timing_cache (Union[str, file-like]): A path or file-like object at which to save a tactic timing cache. Any existing cache will be overwritten. Note that if the provided config includes a tactic timing cache, the data from that cache will be copied into the new cache. """ self._network = network self._config = util.default(config, CreateConfig()) self.timing_cache_path = save_timing_cache
def __init__(self, model, input_metadata=None, output_metadata=None, check_meta=None): """ Extracts a subgraph from an ONNX model. Args: model (Union[Union[onnx.ModelProto, onnx_graphsurgeon.Graph], Callable() -> Union[onnx.ModelProto, onnx_graphsurgeon.Graph]]): An ONNX model or ONNX-GraphSurgeon Graph or a callable that returns one. input_metadata (TensorMetadata): Metadata for the inputs of the subgraph. Name, shape, and data type are required. If not provided, the graph outputs are not modified. output_metadata (TensorMetadata): Metadata for the outputs of the subgraph. Name and data type are required. If not provided, the graph outputs are not modified. check_meta (bool): Whether to check that the provided input and output metadata include all the expected fields. Defaults to True. """ self._model = model self.input_metadata = input_metadata self.output_metadata = output_metadata self.check_meta = util.default(check_meta, True)
def fill_defaults(self, network, default_shape_value=None): """ Fill this profile with sane default values for any bindings whose shapes have not been set explicitly. Args: network (trt.INetworkDefinition): The TensorRT network this profile is meant for. This will be used to determine model inputs and their shapes. default_shape_value (int): The value to use to override dynamic dimensions. Returns: Profile: Self """ default_shape_value = util.default(default_shape_value, constants.DEFAULT_SHAPE_VALUE) for idx in range(network.num_inputs): inp = network.get_input(idx) if inp.name in self: continue with G_LOGGER.verbosity( G_LOGGER.CRITICAL): # WAR for spam from TRT is_shape_tensor = inp.is_shape_tensor if is_shape_tensor: rank = inp.shape[0] shape = (default_shape_value, ) * rank G_LOGGER.warning( "{:} | No values provided; Will use input values: {:} for min/opt/max in profile.\n" .format(trt_util.str_from_tensor(inp, is_shape_tensor), shape, rank), mode=LogMode.ONCE, ) G_LOGGER.warning( "This will cause the shape-tensor to have static values. If this is incorrect, please " "set the range of values for this input shape-tensor.", mode=LogMode.ONCE, ) else: shape = util.override_dynamic_shape(inp.shape, default_shape_value) if shape != inp.shape: G_LOGGER.warning( "{:} | No shapes provided; Will use shape: {:} for min/opt/max in profile.\n" .format(trt_util.str_from_tensor(inp, is_shape_tensor), shape), mode=LogMode.ONCE, ) G_LOGGER.warning( "This will cause the tensor to have a static shape. If this is incorrect, please " "set the range of shapes for this input tensor.", mode=LogMode.ONCE, ) self.add(inp.name, shape, shape, shape) return self
def __init__(self, gpu_memory_fraction=None, allow_growth=None, use_xla=None): """ Creates a TensorFlow config. Args: gpu_memory_fraction (float): The fraction of GPU memory that will be made available to TensorFlow. This should be a value between 0.0 and 1.0. allow_growth (bool): Whether to allow GPU memory allocated by TensorFlow to grow. use_xla (bool): Whether to attempt to enable XLA. """ self.gpu_memory_fraction = util.default(gpu_memory_fraction, 0.9) self.allow_growth = util.default(allow_growth, False) self.use_xla = util.default(use_xla, False)
def __init__(self, deploy, model, outputs, batch_size=None, dtype=None): self.deploy = deploy self.model = model if not self.model: G_LOGGER.warning( "No model file provided for Caffe model, random weights will be used. To avoid this, " "please set the model paramater, or --model") if not outputs: G_LOGGER.critical( "Please set Caffe model outputs using the outputs parameter, or --trt-outputs. " "Note: To determine possible outputs, try running: tail -n50 {:}" .format(deploy)) self.outputs = outputs self.dtype = util.default(dtype, trt.float32) self.batch_size = util.default(batch_size, 1)
def parse(self, args): self.verbosity_count = args_util.get(args, "verbose") - args_util.get( args, "quiet") self.silent = args_util.get(args, "silent") self.log_format = util.default(args_util.get(args, "log_format"), []) self.log_file = args_util.get(args, "log_file") # Enable logger settings immediately on parsing. self.get_logger()
def __init__(self, model, copy=None): """ Args: model (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model. copy (bool): Whether to create a copy of the model first. Defaults to False. """ self._model = model self.copy = util.default(copy, False)
def parse(self, args): def determine_model_type(): if args_util.get(args, "model_type") is not None: return args.model_type.lower() if args_util.get(args, "model_file") is None: return None def use_ext(ext_mapping): file_ext = os.path.splitext(args.model_file)[-1] if file_ext in ext_mapping: return ext_mapping[file_ext] runners = util.default(args_util.get(args, "runners"), []) if args_util.get(args, "ckpt") or os.path.isdir(args.model_file): return "ckpt" elif "tf" in runners or "trt_legacy" in runners: if args.caffe_model: return "caffe" return use_ext(ModelArgs.EXT_MODEL_TYPE_MAPPING) or "frozen" else: model_type = use_ext(ModelArgs.EXT_MODEL_TYPE_MAPPING) if model_type: return model_type G_LOGGER.exit( "Could not automatically determine model type for: {:}\n" "Please explicitly specify the type with the --model-type option" .format(args.model_file)) if args_util.get(args, "input_shapes"): self.input_shapes = args_util.parse_meta( args_util.get(args, "input_shapes"), includes_dtype=False) # TensorMetadata else: self.input_shapes = TensorMetadata() self.model_file = args_util.get(args, "model_file") if self.model_file: G_LOGGER.verbose("Model: {:}".format(self.model_file)) if not os.path.exists(self.model_file): G_LOGGER.warning("Model path does not exist: {:}".format( self.model_file)) self.model_file = os.path.abspath(self.model_file) model_type_str = util.default(self._model_type, determine_model_type()) self.model_type = ModelArgs.ModelType( model_type_str) if model_type_str else None if self.model_type == "trt-network-script" and ( not self.model_file or not self.model_file.endswith(".py")): G_LOGGER.exit( "TensorRT network scripts must exist and have '.py' extensions. " "Note: Provided network script path was: {:}".format( self.model_file))
def wrapper(): """ Returns the global Polygraphy CUDA wrapper. Returns: Cuda: The global CUDA wrapper. """ global G_CUDA G_CUDA = util.default(G_CUDA, Cuda()) return G_CUDA
def call_impl(self): """ Returns: onnx.ModelProto: The model, after saving it. """ model, _ = util.invoke_if_callable(self._model) G_LOGGER.info("Saving ONNX model to: {:}".format(self.path)) if self.external_data_path is not None: G_LOGGER.verbose( "Saving external data for ONNX model to: {:}".format( self.external_data_path)) try: external_data_helper.convert_model_to_external_data( model, location=self.external_data_path, all_tensors_to_one_file=util.default( self.all_tensors_to_one_file, True), size_threshold=util.default(self.size_threshold, 1024), ) except TypeError: if self.size_threshold is not None: G_LOGGER.warning( "This version of onnx does not support size_threshold in convert_model_to_external_data" ) external_data_helper.convert_model_to_external_data( model, location=self.external_data_path, all_tensors_to_one_file=util.default( self.all_tensors_to_one_file, True), ) else: if self.size_threshold is not None: G_LOGGER.warning( "size_threshold is set, but external data path has not been set. " "No external data will be written.") if self.all_tensors_to_one_file is not None: G_LOGGER.warning( "all_tensors_to_one_file is set, but external data path has not been set. " "No external data will be written.") util.makedirs(self.path) onnx.save(model, self.path) return model
def __init__(self, model, copy=None): """ Args: model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): An ONNX model or a callable that returns one. copy (bool): Whether to create a copy of the model first. Defaults to False. """ self._model = model self.copy = util.default(copy, False)
def __init__(self, model_required=False, inputs="--inputs", model_type=None, inputs_doc=None): super().__init__() self._model_required = model_required self._inputs = inputs # If model type is provided, it means the tool only supports a single type of model. self._model_type = model_type self._inputs_doc = util.default( inputs_doc, "Model input(s) and their shape(s). " "Used to determine shapes to use while generating input data for inference", )
def __init__(self, model, error_ok=None): """ Run shape inference on an ONNX model. Args: model (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model. error_ok (bool): Whether errors during shape inference should be suppressed. Defaults to True. """ self._model = model self.error_ok = util.default(error_ok, True)
def __init__(self, graph, config=None): """ Creates a TensorFlow session. Args: graph (Union[Tuple[tf.Graph, Sequence[str]], Callable() -> Tuple[tf.Graph, Sequence[str]]]): A tuple containing a TensorFlow graph and output names or a callable that returns one. config (Union[tf.ConfigProto, Callable() -> tf.ConfigProto]): A TensorFlow ConfigProto or a callable that returns one. """ self.graph = graph self.config = util.default(config, CreateConfig())
def __init__(self, graph, config=None): """ Creates a TensorFlow session. Args: graph (Callable() -> Tuple[tf.Graph, Sequence[str]]): A callable that can supply a tuple containing a TensorFlow graph and output names. config (Callable() -> tf.ConfigProto): """ self.graph = graph self.config = util.default(config, CreateConfig())
def __init__(self, plugins=None, obj=None): """ Loads plugins from the specified paths. Args: plugins (List[str]): A list of paths to plugin libraries to load before inference. obj (BaseLoader): An object or callable to return or call respectively. If ``obj`` is callable, extra parameters will be forwarded to ``obj``. If ``obj`` is not callable, it will be returned. """ self.plugins = util.default(plugins, []) self.obj = obj
def __init__(self, arg_group, deps=None): self.deps = util.default(deps, []) self.arg_group = arg_group self.parser = argparse.ArgumentParser() for dep in self.deps: for other_dep in self.deps: other_dep.register(dep) self.arg_group.register(dep) self.arg_group.check_registered() for dep in self.deps: dep.add_to_parser(self.parser) self.arg_group.add_to_parser(self.parser)
def __init__(self): # Must explicitly initialize parent for any trampoline class! Will mysteriously segfault without this. BaseClass.__init__(self) self.is_active = False self.data_loader = data_loader self._cache = cache self.device_buffers = OrderedDict() self.reset() G_LOGGER.verbose("Created calibrator [cache={:}]".format(self._cache)) self.batch_size = util.default(batch_size, 1) # The function that constructed this instance self.make_func = Calibrator