예제 #1
0
    def __call__(self):
        """
        Builds a TensorRT engine.

        Returns:
            trt.ICudaEngine: The engine that was created.
        """
        # If network is a callable, then we own its return value
        ret, owning = misc.try_call(self._network)
        builder, network, parser = misc.unpack_args(ret, num=3)

        with contextlib.ExitStack() as stack:
            provided = "Builder and Network" if parser is None else "Builder, Network, and Parser"
            if owning:
                stack.enter_context(builder)
                stack.enter_context(network)
                if parser is not None:
                    stack.enter_context(parser)
            else:
                G_LOGGER.verbose("{:} were provided directly instead of via a Callable. This loader will not assume ownership. "
                               "Please ensure that they are freed.".format(provided))

            network_log_mode = "full" if G_LOGGER.severity <= G_LOGGER.ULTRA_VERBOSE else "attrs"
            G_LOGGER.super_verbose(lambda: ("Displaying TensorRT Network:\n" + trt_util.str_from_network(network, mode=network_log_mode)))

            config, _ = misc.try_call(self._config, builder, network)
            G_LOGGER.info("Building engine with configuration: {:}".format(trt_util.str_from_config(config)))
            engine = builder.build_engine(network, config)
            if not engine:
                G_LOGGER.critical("Invalid Engine. Please ensure the engine was built correctly")

            if hasattr(config.int8_calibrator, "free"):
                config.int8_calibrator.free()

            return engine
예제 #2
0
    def __call__(self):
        """
        Modifies a TensorRT ``INetworkDefinition``.

        Returns:
            trt.INetworkDefinition: The modified network.
        """
        ret, owns_network = misc.try_call(self._network)
        builder, network, parser = misc.unpack_args(ret, num=3)

        with contextlib.ExitStack() as stack:
            if owns_network:
                stack.enter_context(
                    misc.FreeOnException([builder, network, parser]))

            if self.outputs == constants.MARK_ALL:
                trt_util.mark_layerwise(network)
            elif self.outputs is not None:
                trt_util.mark_outputs(network, self.outputs)

            if self.exclude_outputs is not None:
                trt_util.unmark_outputs(network, self.exclude_outputs)

            if parser is None:
                return builder, network
            return builder, network, parser
예제 #3
0
    def __call__(self):
        """
        Writes out artifacts from a TensorFlow Graph.

        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        (graph, outputs), _ = misc.try_call(self._graph)

        misc.lazy_write(
            contents=lambda: graph.as_graph_def().SerializeToString(),
            path=self.path)
        if self.tensorboard_dir:
            G_LOGGER.info("Writing tensorboard events to {:}".format(
                self.tensorboard_dir))
            train_writer = tf.compat.v1.summary.FileWriter(
                self.tensorboard_dir)
            train_writer.add_graph(graph)

        if self.engine_dir is not None:
            graphdef = graph.as_graph_def()
            segment_number = 0
            for node in graphdef.node:
                if node.op == "TRTEngineOp":
                    engine = node.attr["serialized_segment"].s
                    if self.engine_dir is not None:
                        misc.lazy_write(
                            contents=engine,
                            path=os.path.join(
                                self.engine_dir,
                                "segment-{:}".format(segment_number)))
                    segment_number += 1

        return graph, outputs
예제 #4
0
    def __call__(self):
        """
        Converts a TensorFlow model into ONNX.

        Returns:
            onnx.ModelProto: The ONNX model.
        """
        import tensorflow as tf
        import tf2onnx
        from polygraphy.backend.tf import util as tf_util

        misc.log_module_info(tf2onnx)

        (graph, output_names), _ = misc.try_call(self._graph)
        input_names = list(tf_util.get_input_metadata(graph).keys())

        if self.fold_constant:
            G_LOGGER.info("Folding constants in graph using tf2onnx.tfonnx.tf_optimize")
        graphdef = graph.as_graph_def()
        if self.optimize:
            graphdef = tf2onnx.tfonnx.tf_optimize(input_names, output_names, graph.as_graph_def(), fold_constant=self.fold_constant)

        with tf.Graph().as_default() as graph, tf.compat.v1.Session(graph=graph) as sess:
            tf.import_graph_def(graphdef, name="")

            onnx_graph = tf2onnx.tfonnx.process_tf_graph(graph, input_names=input_names, output_names=output_names, opset=self.opset)
            if self.optimize:
                onnx_graph = tf2onnx.optimizer.optimize_graph(onnx_graph)
            return onnx_util.check_model(onnx_graph.make_model("model"))
예제 #5
0
    def __call__(self):
        """
        Builds an ONNX-Runtime inference session.

        Returns:
            onnxruntime.InferenceSession: The inference session.
        """
        model_bytes, _ = misc.try_call(self._model_bytes)
        return onnxruntime.InferenceSession(model_bytes)
예제 #6
0
    def __call__(self):
        """
        Serializes an ONNX model.

        Returns:
            bytes: The serialized model.
        """
        model, _ = misc.try_call(self._model)
        return model.SerializeToString()
예제 #7
0
    def __call__(self):
        """
        Creates a TensorFlow session.

        Returns:
            tf.Session: The TensorFlow session.
        """
        config, _ = misc.try_call(self.config)
        (graph, output_names), _ = misc.try_call(self.graph)

        with graph.as_default() as graph, tf.compat.v1.Session(
                graph=graph, config=config).as_default() as sess:
            G_LOGGER.verbose(
                "Using TensorFlow outputs: {:}".format(output_names))
            G_LOGGER.extra_verbose(
                "Initializing variables in TensorFlow Graph")
            sess.run(tf.compat.v1.initializers.global_variables())
            return sess, output_names
예제 #8
0
    def __call__(self):
        """
        Saves an engine to the provided path.

        Returns:
            trt.ICudaEngine: The engine that was saved.
        """
        engine, _ = misc.try_call(self._engine)
        misc.lazy_write(contents=lambda: engine.serialize(), path=self.path)
        return engine
예제 #9
0
    def __call__(self):
        """
        Saves an ONNX model to the specified path.

        Returns:
            onnx.ModelProto: The model, after saving it.
        """
        model, _ = misc.try_call(self._model)
        misc.lazy_write(contents=lambda: model.SerializeToString(), path=self.path)
        return model
예제 #10
0
        def __init__(self, path, explicit_precision=None):
            """
            Functor that parses an ONNX model to create a trt.INetworkDefinition.
            This loader supports models with weights stored in an external location.

            Args:
                path (str): The path from which to load the model.
            """
            from polygraphy.backend.common import BytesFromPath
            load_model = BytesFromPath(misc.try_call(self.path)[0](self.path))
            super().__init__(load_model, explicit_precision)
예제 #11
0
    def __call__(self):
        from polygraphy.backend.onnx import util as onnx_util

        builder, network, parser = super().__call__()
        onnx_model, _ = misc.try_call(self.onnx_loader)
        dtype, shape = list(
            onnx_util.get_input_metadata(onnx_model.graph).values())[0]

        parser.parse(onnx_model.SerializeToString())
        trt_util.check_onnx_parser_errors(parser)

        return builder, network, parser, shape[0]
예제 #12
0
    def __call__(self):
        """
        Parses an ONNX model.

        Returns:
            (trt.IBuilder, trt.INetworkDefinition, trt.OnnxParser):
                    A TensorRT network, as well as the builder used to create it, and the parser
                    used to populate it.
        """
        builder, network, parser = super().__call__()
        parser.parse(misc.try_call(self._model_bytes)[0])
        trt_util.check_onnx_parser_errors(parser)
        return builder, network, parser
예제 #13
0
    def __call__(self):
        """
        Deserializes an engine from a buffer.

        Returns:
            trt.ICudaEngine: The deserialized engine.
        """
        buffer, _ = misc.try_call(self._serialized_engine)

        trt.init_libnvinfer_plugins(trt_util.TRT_LOGGER, "")
        with trt.Runtime(trt_util.TRT_LOGGER) as runtime:
            engine = runtime.deserialize_cuda_engine(buffer)
            if not engine:
                G_LOGGER.critical("Could not load engine")
        return engine
예제 #14
0
        def __call__(self):
            """
            Parses an ONNX model from a file.

            Returns:
                (trt.IBuilder, trt.INetworkDefinition, trt.OnnxParser):
                        A TensorRT network, as well as the builder used to create it, and the parser
                        used to populate it.
            """
            builder, network, parser = super().__call__()
            # We need to use parse_from_file for the ONNX parser to keep track of the location of the ONNX file for
            # potentially parsing any external weights.
            parser.parse_from_file(misc.try_call(self.path)[0])
            trt_util.check_onnx_parser_errors(parser)
            return builder, network, parser
예제 #15
0
    def activate_impl(self):
        def make_buffers(engine):
            """
            Creates empty host and device buffers for the specified engine.
            Always uses binding names from Profile 0.
            """
            device_buffers = OrderedDict()
            host_output_buffers = OrderedDict()

            for idx in range(trt_util.get_bindings_per_profile(engine)):
                binding = engine[idx]
                dtype = trt.nptype(engine.get_binding_dtype(binding))
                device_buffers[binding] = cuda.DeviceBuffer(dtype=dtype)
                if not engine.binding_is_input(binding):
                    host_output_buffers[binding] = np.empty(shape=tuple(),
                                                            dtype=dtype)
            G_LOGGER.extra_verbose(
                "Created device buffers: {:}".format(device_buffers))
            return device_buffers, host_output_buffers

        engine_or_context, owning = misc.try_call(self._engine_or_context)

        self.engine, self.owns_engine = None, False
        self.context, self.owns_context = None, False

        if isinstance(engine_or_context, trt.ICudaEngine):
            self.engine = engine_or_context
            self.owns_engine = owning
            self.context = self.engine.create_execution_context()
            if not self.context:
                G_LOGGER.critical(
                    "Invalid Context. See error log for details.")
        elif isinstance(engine_or_context, trt.IExecutionContext):
            self.context = engine_or_context
            self.owns_context = owning
        else:
            G_LOGGER.critical(
                "Invalid Engine or Context. Please ensure the engine was built correctly. See error log for details."
            )

        if not owning:
            G_LOGGER.verbose(
                "Object was provided directly instead of via a Callable. This runner will not assume ownership. "
                "Please ensure it is freed.")

        self.device_buffers, self.host_output_buffers = make_buffers(
            self.context.engine)
        self.stream = cuda.Stream()
예제 #16
0
    def __call__(self):
        """
        Saves an engine to the provided path.

        Returns:
            trt.ICudaEngine: The engine that was saved.
        """
        engine, owns_engine = misc.try_call(self._engine)

        with contextlib.ExitStack() as stack:
            if owns_engine:
                stack.enter_context(misc.FreeOnException([engine]))

            misc.lazy_write(contents=lambda: engine.serialize(),
                            path=self.path)
            return engine
예제 #17
0
    def __call__(self):
        """
        Modifies a TensorFlow graph.

        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        (graph, outputs), _ = misc.try_call(self._graph)

        if self.outputs == constants.MARK_ALL:
            outputs = list(
                tf_util.get_output_metadata(graph, layerwise=True).keys())
        elif self.outputs is not None:
            outputs = self.outputs

        return graph, outputs
예제 #18
0
    def activate_impl(self):
        # If engine is a callable, then we own the engine
        self.engine, self.owning = misc.try_call(self._engine)

        if not self.engine:
            G_LOGGER.critical(
                "Invalid Engine. Please ensure the engine was built correctly")

        if not self.owning:
            G_LOGGER.verbose(
                "Engine was provided directly instead of via a Callable. This runner will not assume ownership. "
                "Please ensure the engine is freed.")

        self.buffers = Buffers.from_engine(self.engine)
        self.stream = cuda.Stream()

        self.context = self.engine.create_execution_context()
예제 #19
0
    def __call__(self):
        """
        Freezes a TensorFlow graph, and folds constants.

        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        (graph, output_names), _ = misc.try_call(self._graph)
        with tf.Session(graph=graph) as sess:
            sess.run(tf.initializers.global_variables())
            sess.run(tf.initializers.local_variables())

            graphdef = sess.graph.as_graph_def()
            removed = tf.graph_util.remove_training_nodes(graphdef)
            G_LOGGER.ultra_verbose("Removed nodes: {:}".format(removed))

            for node in graphdef.node:
                if node.op == 'RefSwitch':
                    node.op = 'Switch'
                    for index in range(len(node.input)):
                        if 'moving_' in node.input[index]:
                            node.input[index] = node.input[index] + '/read'
                elif node.op == 'AssignSub':
                    node.op = 'Sub'
                    if 'use_locking' in node.attr: del node.attr['use_locking']
                elif node.op == 'AssignAdd':
                    node.op = 'Add'
                    if 'use_locking' in node.attr: del node.attr['use_locking']
                elif node.op == 'Assign':
                    node.op = 'Identity'
                    if 'use_locking' in node.attr: del node.attr['use_locking']
                    if 'validate_shape' in node.attr:
                        del node.attr['validate_shape']
                    if len(node.input) == 2:
                        # input0: ref: Should be from a Variable node. May be uninitialized.
                        # input1: value: The value to be assigned to the variable.
                        node.input[0] = node.input[1]
                        del node.input[1]

            # Strip port information from outputs
            output_names = [name.split(":")[0] for name in output_names]
            output_graph_def = tf.graph_util.convert_variables_to_constants(
                sess, graphdef, output_names)
            output_graph_def = self.constfold(output_graph_def, output_names)
            return func.invoke(GraphFromFrozen(output_graph_def))
예제 #20
0
    def __call__(self):
        """
        Optimizes a TensorFlow model using TF-TRT.

        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        from tensorflow.contrib import tensorrt as tf_trt

        (graph, output_names), _ = misc.try_call(self._graph)

        precision_mode = "FP16" if self.fp16 else "FP32"
        precision_mode = "INT8" if self.int8 else precision_mode

        G_LOGGER.info(
            "For TF-TRT, using outputs={:}, max_workspace_size_bytes={:}, max_batch_size={:}, "
            "minimum_segment_size={:}, is_dynamic_op={:}, precision_mode={:}".
            format(output_names, self.max_workspace_size, self.max_batch_size,
                   self.minimum_segment_size, self.is_dynamic_op,
                   precision_mode))

        graphdef = tf_trt.create_inference_graph(
            graph.as_graph_def(),
            outputs=output_names,
            max_workspace_size_bytes=self.max_workspace_size,
            max_batch_size=self.max_batch_size,
            minimum_segment_size=self.minimum_segment_size,
            is_dynamic_op=self.is_dynamic_op,
            precision_mode=precision_mode)

        segment_number = 0
        for node in graphdef.node:
            if node.op == "TRTEngineOp":
                engine = node.attr["serialized_segment"].s
                segment_number += 1
        G_LOGGER.info(
            "Found {:} engines in TFTRT graph".format(segment_number))

        with tf.Graph().as_default() as graph:
            tf.import_graph_def(graphdef, name="")
            return graph, tf_util.get_graph_output_names(graph)
예제 #21
0
    def __call__(self):
        """
        Modifies an ONNX model.

        Returns:
            onnx.ModelProto: The modified ONNX model.
        """
        model, _ = misc.try_call(self._model)

        if self.do_shape_inference:
            model = onnx_util.infer_shapes(model)

        if self.outputs == constants.MARK_ALL:
            G_LOGGER.verbose("Marking all ONNX tensors as outputs")
            model = onnx_util.mark_layerwise(model)
        elif self.outputs is not None:
            model = onnx_util.mark_outputs(model, self.outputs)

        if self.exclude_outputs is not None:
            model = onnx_util.unmark_outputs(model, self.exclude_outputs)

        return onnx_util.check_model(model)
예제 #22
0
    def __call__(self):
        """
        Modifies a TensorRT ``INetworkDefinition``.

        Returns:
            trt.INetworkDefinition: The modified network.
        """
        ret, _ = misc.try_call(self._network)
        builder, network, parser = misc.unpack_args(ret, num=3)

        if self.outputs == constants.MARK_ALL:
            trt_util.mark_layerwise(network)
        elif self.outputs is not None:
            trt_util.mark_outputs(network, self.outputs)

        if self.exclude_outputs is not None:
            trt_util.unmark_outputs(network, self.exclude_outputs)

        if parser is not None:
            return builder, network, parser
        else:
            return builder, network
예제 #23
0
 def activate_impl(self):
     self.sess, _ = misc.try_call(self._sess)
예제 #24
0
 def activate_impl(self):
     self.model, _ = misc.try_call(self._model)
     self.model.eval()
예제 #25
0
    def __call__(self, *args, **kwargs):
        if self.plugins:
            trt_util.load_plugins(self.plugins)

        ret, _ = misc.try_call(self.obj, *args, **kwargs)
        return ret
예제 #26
0
    def activate_impl(self):
        self.model, _ = misc.try_call(self._model)

        import onnx_tf
        G_LOGGER.info("Preparing ONNX-TF backend")
        self.tf_rep = onnx_tf.backend.prepare(self.model)
예제 #27
0
 def activate_impl(self):
     import tensorflow as tf
     (self.sess, self.output_names), _ = misc.try_call(self._sess)