def __call__(self): """ Builds a TensorRT engine. Returns: trt.ICudaEngine: The engine that was created. """ # If network is a callable, then we own its return value ret, owning = misc.try_call(self._network) builder, network, parser = misc.unpack_args(ret, num=3) with contextlib.ExitStack() as stack: provided = "Builder and Network" if parser is None else "Builder, Network, and Parser" if owning: stack.enter_context(builder) stack.enter_context(network) if parser is not None: stack.enter_context(parser) else: G_LOGGER.verbose("{:} were provided directly instead of via a Callable. This loader will not assume ownership. " "Please ensure that they are freed.".format(provided)) network_log_mode = "full" if G_LOGGER.severity <= G_LOGGER.ULTRA_VERBOSE else "attrs" G_LOGGER.super_verbose(lambda: ("Displaying TensorRT Network:\n" + trt_util.str_from_network(network, mode=network_log_mode))) config, _ = misc.try_call(self._config, builder, network) G_LOGGER.info("Building engine with configuration: {:}".format(trt_util.str_from_config(config))) engine = builder.build_engine(network, config) if not engine: G_LOGGER.critical("Invalid Engine. Please ensure the engine was built correctly") if hasattr(config.int8_calibrator, "free"): config.int8_calibrator.free() return engine
def __call__(self): """ Modifies a TensorRT ``INetworkDefinition``. Returns: trt.INetworkDefinition: The modified network. """ ret, owns_network = misc.try_call(self._network) builder, network, parser = misc.unpack_args(ret, num=3) with contextlib.ExitStack() as stack: if owns_network: stack.enter_context( misc.FreeOnException([builder, network, parser])) if self.outputs == constants.MARK_ALL: trt_util.mark_layerwise(network) elif self.outputs is not None: trt_util.mark_outputs(network, self.outputs) if self.exclude_outputs is not None: trt_util.unmark_outputs(network, self.exclude_outputs) if parser is None: return builder, network return builder, network, parser
def __call__(self): """ Writes out artifacts from a TensorFlow Graph. Returns: Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs. """ (graph, outputs), _ = misc.try_call(self._graph) misc.lazy_write( contents=lambda: graph.as_graph_def().SerializeToString(), path=self.path) if self.tensorboard_dir: G_LOGGER.info("Writing tensorboard events to {:}".format( self.tensorboard_dir)) train_writer = tf.compat.v1.summary.FileWriter( self.tensorboard_dir) train_writer.add_graph(graph) if self.engine_dir is not None: graphdef = graph.as_graph_def() segment_number = 0 for node in graphdef.node: if node.op == "TRTEngineOp": engine = node.attr["serialized_segment"].s if self.engine_dir is not None: misc.lazy_write( contents=engine, path=os.path.join( self.engine_dir, "segment-{:}".format(segment_number))) segment_number += 1 return graph, outputs
def __call__(self): """ Converts a TensorFlow model into ONNX. Returns: onnx.ModelProto: The ONNX model. """ import tensorflow as tf import tf2onnx from polygraphy.backend.tf import util as tf_util misc.log_module_info(tf2onnx) (graph, output_names), _ = misc.try_call(self._graph) input_names = list(tf_util.get_input_metadata(graph).keys()) if self.fold_constant: G_LOGGER.info("Folding constants in graph using tf2onnx.tfonnx.tf_optimize") graphdef = graph.as_graph_def() if self.optimize: graphdef = tf2onnx.tfonnx.tf_optimize(input_names, output_names, graph.as_graph_def(), fold_constant=self.fold_constant) with tf.Graph().as_default() as graph, tf.compat.v1.Session(graph=graph) as sess: tf.import_graph_def(graphdef, name="") onnx_graph = tf2onnx.tfonnx.process_tf_graph(graph, input_names=input_names, output_names=output_names, opset=self.opset) if self.optimize: onnx_graph = tf2onnx.optimizer.optimize_graph(onnx_graph) return onnx_util.check_model(onnx_graph.make_model("model"))
def __call__(self): """ Builds an ONNX-Runtime inference session. Returns: onnxruntime.InferenceSession: The inference session. """ model_bytes, _ = misc.try_call(self._model_bytes) return onnxruntime.InferenceSession(model_bytes)
def __call__(self): """ Serializes an ONNX model. Returns: bytes: The serialized model. """ model, _ = misc.try_call(self._model) return model.SerializeToString()
def __call__(self): """ Creates a TensorFlow session. Returns: tf.Session: The TensorFlow session. """ config, _ = misc.try_call(self.config) (graph, output_names), _ = misc.try_call(self.graph) with graph.as_default() as graph, tf.compat.v1.Session( graph=graph, config=config).as_default() as sess: G_LOGGER.verbose( "Using TensorFlow outputs: {:}".format(output_names)) G_LOGGER.extra_verbose( "Initializing variables in TensorFlow Graph") sess.run(tf.compat.v1.initializers.global_variables()) return sess, output_names
def __call__(self): """ Saves an engine to the provided path. Returns: trt.ICudaEngine: The engine that was saved. """ engine, _ = misc.try_call(self._engine) misc.lazy_write(contents=lambda: engine.serialize(), path=self.path) return engine
def __call__(self): """ Saves an ONNX model to the specified path. Returns: onnx.ModelProto: The model, after saving it. """ model, _ = misc.try_call(self._model) misc.lazy_write(contents=lambda: model.SerializeToString(), path=self.path) return model
def __init__(self, path, explicit_precision=None): """ Functor that parses an ONNX model to create a trt.INetworkDefinition. This loader supports models with weights stored in an external location. Args: path (str): The path from which to load the model. """ from polygraphy.backend.common import BytesFromPath load_model = BytesFromPath(misc.try_call(self.path)[0](self.path)) super().__init__(load_model, explicit_precision)
def __call__(self): from polygraphy.backend.onnx import util as onnx_util builder, network, parser = super().__call__() onnx_model, _ = misc.try_call(self.onnx_loader) dtype, shape = list( onnx_util.get_input_metadata(onnx_model.graph).values())[0] parser.parse(onnx_model.SerializeToString()) trt_util.check_onnx_parser_errors(parser) return builder, network, parser, shape[0]
def __call__(self): """ Parses an ONNX model. Returns: (trt.IBuilder, trt.INetworkDefinition, trt.OnnxParser): A TensorRT network, as well as the builder used to create it, and the parser used to populate it. """ builder, network, parser = super().__call__() parser.parse(misc.try_call(self._model_bytes)[0]) trt_util.check_onnx_parser_errors(parser) return builder, network, parser
def __call__(self): """ Deserializes an engine from a buffer. Returns: trt.ICudaEngine: The deserialized engine. """ buffer, _ = misc.try_call(self._serialized_engine) trt.init_libnvinfer_plugins(trt_util.TRT_LOGGER, "") with trt.Runtime(trt_util.TRT_LOGGER) as runtime: engine = runtime.deserialize_cuda_engine(buffer) if not engine: G_LOGGER.critical("Could not load engine") return engine
def __call__(self): """ Parses an ONNX model from a file. Returns: (trt.IBuilder, trt.INetworkDefinition, trt.OnnxParser): A TensorRT network, as well as the builder used to create it, and the parser used to populate it. """ builder, network, parser = super().__call__() # We need to use parse_from_file for the ONNX parser to keep track of the location of the ONNX file for # potentially parsing any external weights. parser.parse_from_file(misc.try_call(self.path)[0]) trt_util.check_onnx_parser_errors(parser) return builder, network, parser
def activate_impl(self): def make_buffers(engine): """ Creates empty host and device buffers for the specified engine. Always uses binding names from Profile 0. """ device_buffers = OrderedDict() host_output_buffers = OrderedDict() for idx in range(trt_util.get_bindings_per_profile(engine)): binding = engine[idx] dtype = trt.nptype(engine.get_binding_dtype(binding)) device_buffers[binding] = cuda.DeviceBuffer(dtype=dtype) if not engine.binding_is_input(binding): host_output_buffers[binding] = np.empty(shape=tuple(), dtype=dtype) G_LOGGER.extra_verbose( "Created device buffers: {:}".format(device_buffers)) return device_buffers, host_output_buffers engine_or_context, owning = misc.try_call(self._engine_or_context) self.engine, self.owns_engine = None, False self.context, self.owns_context = None, False if isinstance(engine_or_context, trt.ICudaEngine): self.engine = engine_or_context self.owns_engine = owning self.context = self.engine.create_execution_context() if not self.context: G_LOGGER.critical( "Invalid Context. See error log for details.") elif isinstance(engine_or_context, trt.IExecutionContext): self.context = engine_or_context self.owns_context = owning else: G_LOGGER.critical( "Invalid Engine or Context. Please ensure the engine was built correctly. See error log for details." ) if not owning: G_LOGGER.verbose( "Object was provided directly instead of via a Callable. This runner will not assume ownership. " "Please ensure it is freed.") self.device_buffers, self.host_output_buffers = make_buffers( self.context.engine) self.stream = cuda.Stream()
def __call__(self): """ Saves an engine to the provided path. Returns: trt.ICudaEngine: The engine that was saved. """ engine, owns_engine = misc.try_call(self._engine) with contextlib.ExitStack() as stack: if owns_engine: stack.enter_context(misc.FreeOnException([engine])) misc.lazy_write(contents=lambda: engine.serialize(), path=self.path) return engine
def __call__(self): """ Modifies a TensorFlow graph. Returns: Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs. """ (graph, outputs), _ = misc.try_call(self._graph) if self.outputs == constants.MARK_ALL: outputs = list( tf_util.get_output_metadata(graph, layerwise=True).keys()) elif self.outputs is not None: outputs = self.outputs return graph, outputs
def activate_impl(self): # If engine is a callable, then we own the engine self.engine, self.owning = misc.try_call(self._engine) if not self.engine: G_LOGGER.critical( "Invalid Engine. Please ensure the engine was built correctly") if not self.owning: G_LOGGER.verbose( "Engine was provided directly instead of via a Callable. This runner will not assume ownership. " "Please ensure the engine is freed.") self.buffers = Buffers.from_engine(self.engine) self.stream = cuda.Stream() self.context = self.engine.create_execution_context()
def __call__(self): """ Freezes a TensorFlow graph, and folds constants. Returns: Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs. """ (graph, output_names), _ = misc.try_call(self._graph) with tf.Session(graph=graph) as sess: sess.run(tf.initializers.global_variables()) sess.run(tf.initializers.local_variables()) graphdef = sess.graph.as_graph_def() removed = tf.graph_util.remove_training_nodes(graphdef) G_LOGGER.ultra_verbose("Removed nodes: {:}".format(removed)) for node in graphdef.node: if node.op == 'RefSwitch': node.op = 'Switch' for index in range(len(node.input)): if 'moving_' in node.input[index]: node.input[index] = node.input[index] + '/read' elif node.op == 'AssignSub': node.op = 'Sub' if 'use_locking' in node.attr: del node.attr['use_locking'] elif node.op == 'AssignAdd': node.op = 'Add' if 'use_locking' in node.attr: del node.attr['use_locking'] elif node.op == 'Assign': node.op = 'Identity' if 'use_locking' in node.attr: del node.attr['use_locking'] if 'validate_shape' in node.attr: del node.attr['validate_shape'] if len(node.input) == 2: # input0: ref: Should be from a Variable node. May be uninitialized. # input1: value: The value to be assigned to the variable. node.input[0] = node.input[1] del node.input[1] # Strip port information from outputs output_names = [name.split(":")[0] for name in output_names] output_graph_def = tf.graph_util.convert_variables_to_constants( sess, graphdef, output_names) output_graph_def = self.constfold(output_graph_def, output_names) return func.invoke(GraphFromFrozen(output_graph_def))
def __call__(self): """ Optimizes a TensorFlow model using TF-TRT. Returns: Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs. """ from tensorflow.contrib import tensorrt as tf_trt (graph, output_names), _ = misc.try_call(self._graph) precision_mode = "FP16" if self.fp16 else "FP32" precision_mode = "INT8" if self.int8 else precision_mode G_LOGGER.info( "For TF-TRT, using outputs={:}, max_workspace_size_bytes={:}, max_batch_size={:}, " "minimum_segment_size={:}, is_dynamic_op={:}, precision_mode={:}". format(output_names, self.max_workspace_size, self.max_batch_size, self.minimum_segment_size, self.is_dynamic_op, precision_mode)) graphdef = tf_trt.create_inference_graph( graph.as_graph_def(), outputs=output_names, max_workspace_size_bytes=self.max_workspace_size, max_batch_size=self.max_batch_size, minimum_segment_size=self.minimum_segment_size, is_dynamic_op=self.is_dynamic_op, precision_mode=precision_mode) segment_number = 0 for node in graphdef.node: if node.op == "TRTEngineOp": engine = node.attr["serialized_segment"].s segment_number += 1 G_LOGGER.info( "Found {:} engines in TFTRT graph".format(segment_number)) with tf.Graph().as_default() as graph: tf.import_graph_def(graphdef, name="") return graph, tf_util.get_graph_output_names(graph)
def __call__(self): """ Modifies an ONNX model. Returns: onnx.ModelProto: The modified ONNX model. """ model, _ = misc.try_call(self._model) if self.do_shape_inference: model = onnx_util.infer_shapes(model) if self.outputs == constants.MARK_ALL: G_LOGGER.verbose("Marking all ONNX tensors as outputs") model = onnx_util.mark_layerwise(model) elif self.outputs is not None: model = onnx_util.mark_outputs(model, self.outputs) if self.exclude_outputs is not None: model = onnx_util.unmark_outputs(model, self.exclude_outputs) return onnx_util.check_model(model)
def __call__(self): """ Modifies a TensorRT ``INetworkDefinition``. Returns: trt.INetworkDefinition: The modified network. """ ret, _ = misc.try_call(self._network) builder, network, parser = misc.unpack_args(ret, num=3) if self.outputs == constants.MARK_ALL: trt_util.mark_layerwise(network) elif self.outputs is not None: trt_util.mark_outputs(network, self.outputs) if self.exclude_outputs is not None: trt_util.unmark_outputs(network, self.exclude_outputs) if parser is not None: return builder, network, parser else: return builder, network
def activate_impl(self): self.sess, _ = misc.try_call(self._sess)
def activate_impl(self): self.model, _ = misc.try_call(self._model) self.model.eval()
def __call__(self, *args, **kwargs): if self.plugins: trt_util.load_plugins(self.plugins) ret, _ = misc.try_call(self.obj, *args, **kwargs) return ret
def activate_impl(self): self.model, _ = misc.try_call(self._model) import onnx_tf G_LOGGER.info("Preparing ONNX-TF backend") self.tf_rep = onnx_tf.backend.prepare(self.model)
def activate_impl(self): import tensorflow as tf (self.sess, self.output_names), _ = misc.try_call(self._sess)