コード例 #1
0
    def attrs_to_dict(attrs):
        attr_dict = OrderedDict()
        for attr in attrs:
            def process_attr(attr_str: str):
                processed = getattr(attr, ONNX_PYTHON_ATTR_MAPPING[attr_str])
                if attr_str == "STRING":
                    processed = processed.decode()
                elif attr_str == "TENSOR":
                    tensor_str = "Tensor: [dtype={:}, shape={:}]".format(get_dtype(processed), get_shape(processed))
                    if mode == "full":
                        tensor_str += " | Values:\n" + misc.indent_block(str(get_values(processed)))
                    processed = tensor_str
                elif attr_str == "GRAPH":
                    processed = "\n" + str_from_onnx_graph(processed, mode, tensors, indent_level=indent_level + 2)
                elif attr_str == "FLOATS" or attr_str == "INTS":
                    # Proto hacky list to normal Python list
                    processed = [p for p in processed]
                elif attr_str == "STRINGS":
                    processed = [p.decode() for p in processed]
                return processed

            if attr.type in ATTR_TYPE_MAPPING:
                attr_str = ATTR_TYPE_MAPPING[attr.type]
                if attr_str in ONNX_PYTHON_ATTR_MAPPING:
                    attr_dict[attr.name] = process_attr(attr_str)
                else:
                    G_LOGGER.warning("Attribute of type {:} is currently unsupported. Skipping attribute.".format(attr_str))
            else:
                G_LOGGER.warning("Attribute type: {:} was not recognized. Was the graph generated with a newer IR "
                                "version than the installed `onnx` package? Skipping attribute.".format(attr.type))
        return attr_dict
コード例 #2
0
ファイル: runner.py プロジェクト: celidos/TensorRT_study
    def __init__(self, sess, timeline_dir=None, name=None):
        """
        Args:
            sess (Callable() -> Tuple[tf.Session, Sequence[str]]):
                    A callable that can supply a tuple containing a
                    TensorFlow session and output names.


            timeline_dir (str):
                    Path to write a TensorFlow timeline.
                    Note that profiling may affect execution time.
            name (str):
                    The human-readable name prefix to use for this runner.
                    A runner count and timestamp will be appended to this prefix.
        """
        super().__init__(name=name, prefix="tf-runner")

        self._sess = sess

        self.timeline_dir = timeline_dir
        self.num_inferences = 0
        self.run_options = None
        self.run_metadata = None
        if self.timeline_dir is not None:
            # Enable profiling
            G_LOGGER.warning(
                "Profiling is enabled. This will impact performance")
            self.run_options = tf.RunOptions(
                trace_level=tf.RunOptions.FULL_TRACE)
            self.run_metadata = tf.RunMetadata()
コード例 #3
0
ファイル: runner.py プロジェクト: celidos/TensorRT_study
    def infer_impl(self, feed_dict):
        G_LOGGER.extra_verbose("Received feed_dict: {:}".format(feed_dict))
        start = time.time()
        inference_outputs = self.sess.run(self.output_names,
                                          feed_dict=feed_dict,
                                          options=self.run_options,
                                          run_metadata=self.run_metadata)
        end = time.time()

        out_dict = OrderedDict()
        for name, out in zip(self.output_names, inference_outputs):
            out_dict[name] = out
        self.inference_time = end - start

        def generate_timeline():
            from tensorflow.python.client import timeline
            t1 = timeline.Timeline(self.run_metadata.step_stats)
            return t1.generate_chrome_trace_format()

        if self.timeline_dir is not None:
            misc.lazy_write(contents=generate_timeline,
                            path=os.path.join(
                                self.timeline_dir,
                                "run-{:}".format(self.num_inferences)),
                            mode="w")
        self.num_inferences += 1

        return out_dict
コード例 #4
0
def mark_layerwise(network):
    # Layers within loops cannot be marked as network outputs.
    LOOP_START_NAMES = ["TRIP_LIMIT", "ITERATOR", "RECURRENCE"]
    LOOP_END_NAMES = ["LOOP_OUTPUT"]
    LOOP_START_LAYERS = [getattr(trt.LayerType, attr) for attr in LOOP_START_NAMES if hasattr(trt.LayerType, attr)]
    LOOP_END_LAYERS = [getattr(trt.LayerType, attr) for attr in LOOP_END_NAMES if hasattr(trt.LayerType, attr)]
    EXCLUDE_OUTPUT_LAYERS = [trt.LayerType.SHAPE, trt.LayerType.CONSTANT]
    outputs = []
    in_loop = False
    for layer in network:
        if layer.type in LOOP_START_LAYERS:
            G_LOGGER.warning("Loop detected. Please ensure the network is topologically sorted so that layers within "
                             "the loop body are not marked as network outputs in layerwise mode", mode=LogMode.ONCE)
            in_loop = True
        elif layer.type in LOOP_END_LAYERS:
            in_loop = False

        should_mark_layer = not in_loop and layer.type not in EXCLUDE_OUTPUT_LAYERS
        if should_mark_layer:
            for index in range(layer.num_outputs):
                tensor = layer.get_output(index)
                outputs.append(tensor.name)

    G_LOGGER.verbose("Marking {:} tensors as outputs".format(len(outputs)))
    mark_outputs(network, outputs)
コード例 #5
0
def build_default_profile(builder, network, default_shape_value=None):
    default_shape_value = misc.default_value(default_shape_value, DEFAULT_SHAPE_VALUE)

    def override_shape(shape):
        return tuple([default_shape_value if misc.is_dimension_dynamic(dim) else dim for dim in shape])

    trt_profile = builder.create_optimization_profile()
    for idx in range(network.num_inputs):
        inp = network.get_input(idx)

        with G_LOGGER.verbosity(G_LOGGER.CRITICAL): # WAR for spam from TRT
            is_shape_tensor = inp.is_shape_tensor

        if is_shape_tensor:
            rank = inp.shape[0]
            shape = (default_shape_value, ) * rank
            G_LOGGER.warning("Input shape-tensor: {:24} | Will use input values: {:} in profile.\n"
                             "If this is incorrect, please provide a profile "
                             "that sets the values for this input shape-tensor.".format(inp.name, shape, rank), mode=LogMode.ONCE)
            trt_profile.set_shape_input(inp.name, shape, shape, shape)
        else:
            shape = override_shape(inp.shape)
            if override_shape(inp.shape) != inp.shape:
                G_LOGGER.warning("Input tensor: {:24} | Will use shape: {:} in profile (tensor shape is: {:}).\n"
                                 "If this is incorrect, please provide a profile "
                                 "that sets the shape for this input tensor.".format(inp.name, shape, inp.shape), mode=LogMode.ONCE)
            trt_profile.set_shape(inp.name, shape, shape, shape)
    return check_profile(trt_profile)
コード例 #6
0
def mark_outputs(network, outputs):
    """
    Mark the specified outputs as network outputs.

    Args:
        network (trt.INetworkDefinition): The network in which to mark outputs.
        outputs (Sequence[str]): The names of tensors to mark as outputs.
    """
    outputs = set(outputs)
    all_outputs = []
    for layer in network:
        for index in range(layer.num_outputs):
            tensor = layer.get_output(index)
            all_outputs.append(tensor.name)
            # Clear all old outputs
            if tensor.is_network_output:
                network.unmark_output(tensor)

            if tensor.name in outputs:
                if not tensor.is_network_output:
                    G_LOGGER.ultra_verbose("Marking {:} as an output".format(tensor.name))
                    network.mark_output(tensor)

    marked_outputs = set(_get_network_outputs(network))
    not_found = outputs - marked_outputs
    check_outputs_not_found(not_found, all_outputs)
コード例 #7
0
def register_callback():
    from polygraphy.logger.logger import G_LOGGER

    def set_tf_logging_level(sev):
        import os
        import tensorflow as tf

        if sev > G_LOGGER.WARNING:
            tf_sev = tf.compat.v1.logging.ERROR
            tf_logging_level = "3"
        elif sev > G_LOGGER.INFO:
            tf_sev = tf.compat.v1.logging.WARN
            tf_logging_level = "2"
        elif sev > G_LOGGER.VERBOSE:
            tf_sev = tf.compat.v1.logging.INFO
            tf_logging_level = "1"
        else:
            tf_sev = tf.compat.v1.logging.DEBUG
            tf_logging_level = "0"

        tf.compat.v1.logging.set_verbosity(tf_sev)
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = tf_logging_level

    G_LOGGER.register_callback(
        set_tf_logging_level
    )  # Will be registered when this runner is imported.
コード例 #8
0
ファイル: loader.py プロジェクト: celidos/TensorRT_study
    def add_onnx_loader(self, script, disable_outputs=None, suffix=None):
        if self.model_args.model_type == "onnx":
            script.add_import(imports=["OnnxFromPath"],
                              frm="polygraphy.backend.onnx")
            loader_str = Script.invoke("OnnxFromPath",
                                       self.model_args.model_file)
            loader_name = script.add_loader(loader_str,
                                            "load_onnx",
                                            suffix=suffix)
        else:
            if self.tf2onnx_loader_args is None:
                G_LOGGER.critical(
                    "Could not load: {:}. Is it an ONNX model?".format(
                        self.model_args.model_file))
            loader_name = self.tf2onnx_loader_args.add_to_script(script)

        modify_onnx_str = self._get_modify_onnx_str(
            script, loader_name, disable_outputs=disable_outputs)
        if modify_onnx_str is not None:
            loader_name = script.add_loader(modify_onnx_str, "modify_onnx")

        SAVE_ONNX = "SaveOnnx"
        save_onnx_str = Script.invoke(SAVE_ONNX,
                                      loader_name,
                                      path=self.save_onnx)
        if save_onnx_str != Script.invoke(SAVE_ONNX, loader_name):
            script.add_import(imports=[SAVE_ONNX],
                              frm="polygraphy.backend.onnx")
            loader_name = script.add_loader(save_onnx_str, "save_onnx")

        return loader_name
コード例 #9
0
ファイル: interface.py プロジェクト: clayne/TensorRT
 def _check_type(self, elem):
     if not isinstance(elem, self.elem_type):
         G_LOGGER.critical(
             "Unsupported element type type in {:}. Element: {:} is type: {:} but type: {:} was expected"
             .format(
                 type(self).__name__, repr(elem),
                 type(elem).__name__, self.elem_type.__name__))
コード例 #10
0
    def determine_format(shape):
        """
        Guesses the data format of a given shape.

        Args:
            shape (Tuple[int]): The shape, including batch dimension.

        Returns:
            DataFormat: The determined data format.
        """
        # The smaller this ratio, the closer a and b are.
        def minmax_ratio(a, b):
            return abs(max(a, b) / min(a, b))

        # Assume all shapes include batch dimension
        if len(shape) == 4:
            # Typically, H and W are quite close, so if minmax_ratio(0, 1) > minmax_ratio(1, 2), then we assume CHW.
            if minmax_ratio(shape[1], shape[2]) > minmax_ratio(shape[2], shape[3]):
                return DataFormat.NCHW
            return DataFormat.NHWC
        elif len(shape) == 3:
            return DataFormat.NHW
        elif len(shape) == 2:
            return DataFormat.NW
        else:
            G_LOGGER.warning("Cannot determine format for " + str(shape) +
                ". Currently only implemented for input_buffers with 1-3 non-batch dimensions. Please update this function!")
            return DataFormat.UNKNOWN
コード例 #11
0
    def infer_impl(self, feed_dict):
        start_binding, _ = self.set_shapes_from_feed_dict(feed_dict)

        start = time.time()

        for name, buffer in feed_dict.items():
            self.device_buffers[name].copy_from(buffer, self.stream)

        # Need to offset bindings in case the active profile is not 0.
        status = self.context.execute_async_v2(
            bindings=[0] * start_binding +
            [buf.address() for buf in self.device_buffers.values()],
            stream_handle=self.stream.address())
        if not status:
            G_LOGGER.critical(
                "Model execution failed. Please see the log messages above for details"
            )

        for name, buffer in self.host_output_buffers.items():
            self.host_output_buffers[name] = self.device_buffers[name].copy_to(
                buffer, self.stream)

        self.stream.synchronize()

        end = time.time()
        self.inference_time = end - start

        return self.host_output_buffers
コード例 #12
0
        def generate_buffer(name, dtype, shape):
            if is_shape_tensor(name, dtype):
                buffer = np.array(shape, dtype=dtype)
                G_LOGGER.info(
                    "Assuming {:} is a shape tensor. Setting input values to: {:}. If this is not correct, "
                    "please set it correctly in 'input_metadata' or by providing --input-shapes"
                    .format(name, buffer),
                    mode=LogMode.ONCE)
            elif np.issubdtype(dtype, np.integer):
                # high is 1 greater than the max int drawn
                buffer = rng.randint(low=self.int_range[0],
                                     high=self.int_range[1] + 1,
                                     size=shape,
                                     dtype=dtype)
            elif np.issubdtype(dtype, np.bool_):
                buffer = rng.randint(low=0, high=2, size=shape).astype(dtype)
            else:
                buffer = (rng.random_sample(size=shape) *
                          (self.float_range[1] - self.float_range[0]) +
                          self.float_range[0]).astype(dtype)

            buffer = np.array(
                buffer
            )  # To handle scalars, since the above functions return a float if shape is ().
            return buffer
コード例 #13
0
ファイル: calibrator.py プロジェクト: leo-XUKANG/TensorRT-1
        def write_calibration_cache(self, cache):
            self.cache_contents = cache.tobytes()
            self.has_cached_scales = True

            if self._cache is None:
                return

            try:
                if self._cache.seekable():
                    self._cache.seek(0)
                bytes_written = self._cache.write(self.cache_contents)
                if bytes_written != len(self.cache_contents):
                    G_LOGGER.warning(
                        "Could not write entire cache. Note: cache contains {:} bytes, but only "
                        "{:} bytes were written".format(
                            len(self.cache_contents), bytes_written))
            except AttributeError:
                G_LOGGER.info("Writing calibration cache to: {:}".format(
                    self._cache))
                with open(self._cache, "wb") as f:
                    f.write(self.cache_contents)
            except:
                # Cache is not writable
                return
            else:
                self._cache.flush()
コード例 #14
0
    def __init__(self, max_workspace_size=None, tf32=None, fp16=None, int8=None, profiles=None, calibrator=None, strict_types=None):
        """
        Functor that creates a TensorRT IBuilderConfig.

        Args:
            max_workspace_size (int): The maximum workspace size, in bytes, when building the engine.
            tf32 (bool): Whether to build the engine with TF32 precision enabled. Defaults to False.
            fp16 (bool): Whether to build the engine with FP16 precision enabled. Defaults to False.
            int8 (bool): Whether to build the engine with INT8 precision enabled. Defaults to False.
            profiles (List[Profile]):
                    A list of optimization profiles to add to the configuration. Only needed for
                    networks with dynamic input shapes. If this is omitted for a network with
                    dynamic shapes, a default profile is created, where dynamic dimensions are
                    replaced with Polygraphy's DEFAULT_SHAPE_VALUE  (defined in util/constants.py).
                    See `Profile` for details.
            calibrator (trt.IInt8Calibrator):
                    An int8 calibrator. Only required in int8 mode when
                    the network does not have explicit precision. For networks with
                    dynamic shapes, the last profile provided (or default profile if
                    no profiles are provided) is used during calibration.
        """
        self.max_workspace_size = misc.default_value(max_workspace_size, 1 << 24)
        self.tf32 = misc.default_value(tf32, False)
        self.fp16 = misc.default_value(fp16, False)
        self.int8 = misc.default_value(int8, False)
        self.profiles = misc.default_value(profiles, [])
        self.calibrator = calibrator
        self.strict_types = misc.default_value(strict_types, False)

        if self.calibrator is not None and not self.int8:
            G_LOGGER.warning("A calibrator was provided to `CreateConfig`, but int8 mode was not enabled. "
                             "Did you mean to set `int8=True` to enable building with int8 precision?")
コード例 #15
0
ファイル: trt_legacy.py プロジェクト: leo-XUKANG/TensorRT-1
    def infer(self, feed_dict):
        start = time.time()
        [
            self.input_buffers[name].device.copy_from(buffer, self.stream)
            for name, buffer in feed_dict.items()
        ]
        # We will not run with smaller batch sizes than whatever the builder chose.
        bindings = [
            buf.device.address() for buf in self.input_buffers.values()
        ] + [buf.device.address() for buf in self.output_buffers.values()]
        status = self.context.execute_async(
            batch_size=self.context.engine.max_batch_size,
            bindings=bindings,
            stream_handle=self.stream.address())
        if not status:
            G_LOGGER.critical(
                "Model execution failed. Please see the log messages above for details"
            )

        for out in self.output_buffers.values():
            out.host = out.device.copy_to(out.host, self.stream)

        self.stream.synchronize()
        end = time.time()

        out_dict = OrderedDict()
        for (name, out) in self.output_buffers.items():
            out_dict[name] = out.host
        self.inference_time = end - start
        return out_dict
コード例 #16
0
ファイル: calibrator.py プロジェクト: leo-XUKANG/TensorRT-1
        def read_calibration_cache(self):
            def load_from_cache():
                if self._cache is None:
                    return None

                try:
                    if self._cache.seekable():
                        self._cache.seek(0)
                    return self._cache.read()
                except AttributeError:
                    if os.path.exists(self._cache):
                        G_LOGGER.info(
                            "Reading calibration cache from: {:}".format(
                                self._cache),
                            mode=LogMode.ONCE)
                        with open(self._cache, "rb") as f:
                            return f.read()
                except:
                    # Cache is not readable
                    return None

            if not self.has_cached_scales:
                self.cache_contents = load_from_cache()
                if not self.cache_contents:
                    G_LOGGER.warning(
                        "Calibration cache was provided, but is empty. Will regenerate scales by running calibration.",
                        mode=LogMode.ONCE)
                    self.cache_contents = None
                else:
                    self.has_cached_scales = True
            return self.cache_contents
コード例 #17
0
ファイル: reduce.py プロジェクト: phongphuhanam/TensorRT
        def fix_graph(graph, model):
            """
            Fix the graph so it is valid ONNX.
            """

            def fix_tensor_metadata(tensors, fix_shape=True):
                for tensor in tensors:
                    if not tensor.shape and fix_shape:
                        tensor.shape = layerwise(model)[tensor.name].shape
                    if not tensor.dtype:
                        tensor.dtype = layerwise(model)[tensor.name].dtype

            fix_tensor_metadata(graph.inputs)
            fix_tensor_metadata(graph.outputs, fix_shape=False)

            # If we're marking inputs, there may be cases where some other inputs are required - for
            # example, if the model is branchy. If, after cleanup(), there are any Variable tensors in
            # the graph without inputs, we'll replace them with constants and fold them away.
            tensor_map = graph.tensors()
            needs_const_fold = False
            for tensor in tensor_map.values():
                if isinstance(tensor, gs.Variable) and not tensor.inputs and tensor not in graph.inputs:
                    needs_const_fold = True
                    G_LOGGER.info("Freezing model input: {:}".format(tensor))
                    tensor.to_constant(layerwise(model, include_data=True)[tensor.name])

            if needs_const_fold:
                G_LOGGER.info("Folding constants to remove extraneous subgraphs")
                graph.fold_constants().cleanup()

            return graph
コード例 #18
0
ファイル: loader.py プロジェクト: celidos/TensorRT_study
    def __call__(self):
        """
        Writes out artifacts from a TensorFlow Graph.

        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        (graph, outputs), _ = misc.try_call(self._graph)

        misc.lazy_write(
            contents=lambda: graph.as_graph_def().SerializeToString(),
            path=self.path)
        if self.tensorboard_dir:
            G_LOGGER.info("Writing tensorboard events to {:}".format(
                self.tensorboard_dir))
            train_writer = tf.compat.v1.summary.FileWriter(
                self.tensorboard_dir)
            train_writer.add_graph(graph)

        if self.engine_dir is not None:
            graphdef = graph.as_graph_def()
            segment_number = 0
            for node in graphdef.node:
                if node.op == "TRTEngineOp":
                    engine = node.attr["serialized_segment"].s
                    if self.engine_dir is not None:
                        misc.lazy_write(
                            contents=engine,
                            path=os.path.join(
                                self.engine_dir,
                                "segment-{:}".format(segment_number)))
                    segment_number += 1

        return graph, outputs
コード例 #19
0
    def add_to_script(self, script):
        script.add_import(imports=["TrtLegacyRunner"], frm="polygraphy.backend.trt_legacy")
        G_LOGGER.warning("Legacy TensorRT runner only supports implicit batch TensorFlow/UFF, ONNX, and Caffe models")

        if self.model_args.model_type == "onnx":
            script.add_import(imports=["ParseNetworkFromOnnxLegacy"], frm="polygraphy.backend.trt_legacy")
            onnx_loader = self.onnx_loader_args.add_onnx_loader(script, disable_outputs=True)
            loader_name = script.add_loader(Script.format_str("ParseNetworkFromOnnxLegacy({:})", onnx_loader), "parse_network_from_onnx_legacy")
        elif self.model_args.model_type == "caffe":
            script.add_import(imports=["LoadNetworkFromCaffe"], frm="polygraphy.backend.trt_legacy")
            loader_name = script.add_loader(Script.format_str("LoadNetworkFromCaffe({:}, {:}, {:}, {:})", self.model_args.model_file, self.caffe_model,
                                                                self.trt_outputs, self.batch_size), "parse_network_from_caffe")
        else:
            script.add_import(imports=["LoadNetworkFromUff"], frm="polygraphy.backend.trt_legacy")
            if self.model_args.model_type == "uff":
                script.add_import(imports=["LoadUffFile"], frm="polygraphy.backend.trt_legacy")
                shapes = {name: shape for name, (_, shape) in self.trt_loader_args.input_shapes.items()}
                loader_name = script.add_loader(Script.format_str("LoadUffFile({:}, {:}, {:})", self.model_args.model_file, misc.default_value(shapes, {}), self.trt_outputs), "load_uff_file")
            else:
                script.add_import(imports=["ConvertToUff"], frm="polygraphy.backend.trt_legacy")
                loader_name = script.add_loader(Script.format_str("ConvertToUff({:}, save_uff={:}, preprocessor={:})", self.tf_loader_args.add_to_script(script), self.save_uff, self.preprocessor), "convert_to_uff")
            loader_name = script.add_loader(Script.format_str("LoadNetworkFromUff({:}, uff_order={:})", loader_name, self.uff_order), "uff_network_loader")


        runner_str = Script.format_str("TrtLegacyRunner({:}, {:}, {:}, fp16={:}, tf32={:}, load_engine={:}, save_engine={:}, layerwise={:}, plugins={:})",
                                        loader_name, self.trt_loader_args.workspace, self.batch_size, self.trt_loader_args.fp16, self.trt_loader_args.tf32,
                                        self.model_args.model_file if self.model_args.model_type == "engine" else None,
                                        self.trt_runner_args.save_engine, self.trt_outputs==constants.MARK_ALL, self.trt_loader_args.plugins)


        runner_name = script.add_loader(runner_str, "trt_legacy_runner")
        script.add_runner(runner_name)
        return runner_name
コード例 #20
0
    def __call__(self):
        """
        Converts a TensorFlow model into ONNX.

        Returns:
            onnx.ModelProto: The ONNX model.
        """
        import tensorflow as tf
        import tf2onnx
        from polygraphy.backend.tf import util as tf_util

        misc.log_module_info(tf2onnx)

        (graph, output_names), _ = misc.try_call(self._graph)
        input_names = list(tf_util.get_input_metadata(graph).keys())

        if self.fold_constant:
            G_LOGGER.info("Folding constants in graph using tf2onnx.tfonnx.tf_optimize")
        graphdef = graph.as_graph_def()
        if self.optimize:
            graphdef = tf2onnx.tfonnx.tf_optimize(input_names, output_names, graph.as_graph_def(), fold_constant=self.fold_constant)

        with tf.Graph().as_default() as graph, tf.compat.v1.Session(graph=graph) as sess:
            tf.import_graph_def(graphdef, name="")

            onnx_graph = tf2onnx.tfonnx.process_tf_graph(graph, input_names=input_names, output_names=output_names, opset=self.opset)
            if self.optimize:
                onnx_graph = tf2onnx.optimizer.optimize_graph(onnx_graph)
            return onnx_util.check_model(onnx_graph.make_model("model"))
コード例 #21
0
    def add_to_script(self, script, data_loader_name):
        script.add_import(imports=["Comparator"], frm="polygraphy.comparator")
        script.add_import(imports=["sys"])

        RESULTS_VAR_NAME = Inline("results")

        comparator_run = Script.invoke("Comparator.run",
                                       script.get_runners(),
                                       warm_up=self.warm_up,
                                       data_loader=data_loader_name,
                                       use_subprocess=self.use_subprocess,
                                       save_inputs_path=self.save_inputs)
        script.append_suffix(
            Script.format_str("\n# Runner Execution\n{results} = {:}",
                              Inline(comparator_run),
                              results=RESULTS_VAR_NAME))

        if self.save_results:
            G_LOGGER.verbose("Will save runner results to: {:}".format(
                self.save_results))
            script.add_import(imports=["misc"], frm="polygraphy.util")
            script.append_suffix(
                Script.format_str(
                    "\n# Save results\nmisc.pickle_save({:}, {results})",
                    self.save_results,
                    results=RESULTS_VAR_NAME))

        return RESULTS_VAR_NAME
コード例 #22
0
ファイル: trt_legacy.py プロジェクト: leo-XUKANG/TensorRT-1
        def allocate_buffers(engine):
            input_buffers = OrderedDict()
            output_buffers = OrderedDict()
            bindings = []
            stream = cuda.Stream()
            G_LOGGER.verbose("Using batch size: " +
                             str(engine.max_batch_size) +
                             " during buffer allocation")
            for binding in engine:
                shape = (engine.max_batch_size, ) + tuple(
                    engine.get_binding_shape(binding))
                dtype = engine.get_binding_dtype(binding)

                device_mem = cuda.DeviceBuffer(shape=shape,
                                               dtype=trt.nptype(dtype))
                G_LOGGER.extra_verbose("Tensor: "
                                       "{:40} | Allocated: {:}".format(
                                           binding, device_mem))

                if engine.binding_is_input(binding):
                    input_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        None, device_mem)
                else:
                    host_mem = np.empty(shape=shape, dtype=trt.nptype(dtype))
                    output_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        host_mem, device_mem)
            return input_buffers, output_buffers, stream
コード例 #23
0
ファイル: util.py プロジェクト: celidos/TensorRT_study
    def is_output_node(node):
        # Make sure that we're not using hanging nodes as outputs - must have at least one input.
        if len(node_output_map[node.name]) != 0 or len(node.input) == 0:
            return False

        # Tensors with no shape cannot be outputs and TensorFlow doesn't like certain ops as outputs.
        EXCLUDE_OPS = [
            "Switch",
            "FusedBatchNorm",
            "Assert",
            "NextIteration",
            "Enter",
            "LoopCond",
            "Exit",
            "Print",
            "Assign",
            "NoOp",
            "ReadVariableOp",
            "VarIsInitializedOp",
            "Const"
            ]

        # Additionally, we sometimes need to exclude entire namespaces e.g. while loops.
        EXCLUDE_NAMESPACES = ["while", "Assert"]

        if any([ex_op in node.op for ex_op in EXCLUDE_OPS]) or any([ns in node.name for ns in EXCLUDE_NAMESPACES]):
            G_LOGGER.extra_verbose("Excluding {:}, op {:} is not a valid output op or is part of an excluded namespace "
                             "(Note: excluded namespaces: {:})".format(node.name, node.op, EXCLUDE_NAMESPACES))
            return False

        return True
コード例 #24
0
 def parse_dtype(dtype):
     if dtype is not None:
         if dtype not in misc.NP_TYPE_FROM_STR:
             G_LOGGER.critical(
                 "Could not understand data type: {:}. Please use one of: {:} or `auto`"
                 .format(dtype, list(misc.NP_TYPE_FROM_STR.keys())))
         dtype = misc.NP_TYPE_FROM_STR[dtype]
     return dtype
コード例 #25
0
ファイル: util.py プロジェクト: celidos/TensorRT_study
def get_input_metadata(graph):
    input_tensors = []
    input_nodes = find_nodes_by_ops(graph.as_graph_def(), ["Placeholder", "FIFOQueue"])
    G_LOGGER.verbose("Found input tensors: {:}".format(["{:}: {:}".format(n.name, n.op) for n in input_nodes]))
    for node in input_nodes:
        input_tensors.append(graph.get_tensor_by_name(node.name + ":0"))

    G_LOGGER.verbose("Retrieved TensorFlow input_tensors: {:}".format(input_tensors))
    return get_tensor_metadata(input_tensors)
コード例 #26
0
ファイル: reduce.py プロジェクト: phongphuhanam/TensorRT
 def layerwise(model, include_data=False):
     nonlocal _layerwise_outputs, _layerwise_meta
     if _layerwise_outputs is None or _layerwise_meta is None:
         G_LOGGER.info(
             "Running inference with ONNX-Runtime to determine metadata for intermediate tensors.\n"
             "This will cause intermediate models to have static shapes."
         )
         _layerwise_outputs, _layerwise_meta = self.arg_groups[OnnxShapeInferenceArgs].fallback_inference(model)
     return _layerwise_outputs if include_data else _layerwise_meta
コード例 #27
0
def parse_profile_shapes(default_shapes, min_args, opt_args, max_args):
    """
    Parses TensorRT profile options from command-line arguments.

    Args:
        default_shapes (TensorMetadata): The inference input shapes.

    Returns:
     List[Tuple[OrderedDict[str, Shape]]]:
            A list of profiles with each profile comprised of three dictionaries
            (min, opt, max) mapping input names to shapes.
    """
    def get_shapes(lst, idx):
        nonlocal default_shapes
        default_shapes = copy.copy(default_shapes)
        if idx < len(lst):
            default_shapes.update(parse_meta(lst[idx], includes_dtype=False))

        # Don't care about dtype, and need to override dynamic dimensions
        shapes = {
            name: misc.override_dynamic_shape(shape)
            for name, (_, shape) in default_shapes.items()
        }

        for name, shape in shapes.items():
            if tuple(shapes[name]) != tuple(shape):
                G_LOGGER.warning(
                    "Input tensor: {:} | For TensorRT profile, overriding shape: {:} to: {:}"
                    .format(name, shape, shapes[name]),
                    mode=LogMode.ONCE)

        return shapes

    num_profiles = max(len(min_args), len(opt_args), len(max_args))

    # For cases where input shapes are provided, we have to generate a profile
    if not num_profiles and default_shapes:
        num_profiles = 1

    profiles = []
    for idx in range(num_profiles):
        min_shapes = get_shapes(min_args, idx)
        opt_shapes = get_shapes(opt_args, idx)
        max_shapes = get_shapes(max_args, idx)
        if sorted(min_shapes.keys()) != sorted(opt_shapes.keys()):
            G_LOGGER.critical(
                "Mismatch in input names between minimum shapes ({:}) and optimum shapes "
                "({:})".format(list(min_shapes.keys()),
                               list(opt_shapes.keys())))
        elif sorted(opt_shapes.keys()) != sorted(max_shapes.keys()):
            G_LOGGER.critical(
                "Mismatch in input names between optimum shapes ({:}) and maximum shapes "
                "({:})".format(list(opt_shapes.keys()),
                               list(max_shapes.keys())))

        profiles.append((min_shapes, opt_shapes, max_shapes))
    return profiles
コード例 #28
0
ファイル: loader.py プロジェクト: celidos/TensorRT_study
    def __call__(self, builder, network):
        """
        Creates a TensorRT IBuilderConfig that can be used by the EngineFromNetwork.

        Args:
            builder (trt.Builder):
                    The TensorRT builder to use to create the configuration.
            network (trt.INetworkDefinition):
                    The TensorRT network for which to create the config. The network is used to
                    automatically create a default optimization profile if none are provided.

        Returns:
            trt.IBuilderConfig: The TensorRT builder configuration.
        """
        with misc.FreeOnException([builder.create_builder_config()
                                   ]) as (config, ):
            calibration_profile = None
            for profile in self.profiles:
                calibration_profile = trt_util.build_profile(
                    builder, network, profile)
                config.add_optimization_profile(calibration_profile)
            if not self.profiles:
                calibration_profile = trt_util.build_default_profile(
                    builder, network)
                config.add_optimization_profile(calibration_profile)

            if self.profiles:
                G_LOGGER.info("Configuring with profiles: {:}".format(
                    self.profiles))

            config.max_workspace_size = int(self.max_workspace_size)

            if self.strict_types:
                config.set_flag(trt.BuilderFlag.STRICT_TYPES)
            if not self.tf32:
                with contextlib.suppress(AttributeError):
                    config.clear_flag(trt.BuilderFlag.TF32)
            if self.fp16:
                config.set_flag(trt.BuilderFlag.FP16)
            if self.int8:
                config.set_flag(trt.BuilderFlag.INT8)
                if not network.has_explicit_precision:
                    if self.calibrator is not None:
                        input_metadata = trt_util.get_input_metadata_from_profile(
                            calibration_profile, network)
                        with contextlib.suppress(AttributeError):
                            self.calibrator.reset(input_metadata)
                        config.int8_calibrator = self.calibrator
                        with contextlib.suppress(AttributeError):
                            config.set_calibration_profile(calibration_profile)
                    else:
                        G_LOGGER.warning(
                            "Network does not have explicit precision and no calibrator was provided. Please ensure "
                            "that tensors in the network have dynamic ranges set, or provide a calibrator in order to use int8 mode."
                        )
            return config
コード例 #29
0
 def run(self, command):
     G_LOGGER.info("Running: {:} from cwd: {:}".format(command, self.path))
     env = copy.copy(os.environ)
     env["PYTHONPATH"] = ROOT_DIR
     env["PATH"] = os.path.join(ROOT_DIR, "bin") + os.path.pathsep + env["PATH"]
     # Remove whitespace args and escaped newlines
     command = [arg for arg in command.strip().split(" ") if arg.strip() and arg != "\\\n"]
     status = sp.run(command, cwd=self.path, env=env, stdout=sp.PIPE, stderr=sp.PIPE, universal_newlines=True)
     assert status.returncode == 0, status.stdout + "\n" + status.stderr
     return status
コード例 #30
0
ファイル: loader.py プロジェクト: celidos/TensorRT_study
    def __call__(self):
        """
        Builds a TensorRT engine.

        Returns:
            trt.ICudaEngine: The engine that was created.
        """
        # If network is a callable, then we own its return value
        ret, owns_network = misc.try_call(self._network)
        builder, network, parser = misc.unpack_args(ret, num=3)

        with contextlib.ExitStack() as stack:
            if owns_network:
                stack.enter_context(builder)
                stack.enter_context(network)
                if parser is not None:
                    stack.enter_context(parser)
            else:
                provided = "Builder and Network" if parser is None else "Builder, Network, and Parser"
                G_LOGGER.verbose(
                    "{:} were provided directly instead of via a Callable. This loader will not assume ownership. "
                    "Please ensure that they are freed.".format(provided))

            config, owns_config = misc.try_call(self._config, builder, network)
            if owns_config:
                stack.enter_context(config)
            else:
                G_LOGGER.verbose(
                    "Builder configuration was provided directly instead of via a Callable. This loader will not assume "
                    "ownership. Please ensure it is freed.")

            network_log_mode = "full" if G_LOGGER.severity <= G_LOGGER.ULTRA_VERBOSE else "attrs"
            G_LOGGER.super_verbose(
                lambda: ("Displaying TensorRT Network:\n" + trt_util.
                         str_from_network(network, mode=network_log_mode)))

            G_LOGGER.info("Building engine with configuration: {:}".format(
                trt_util.str_from_config(config)))

            if misc.version(trt.__version__) < misc.version("7.3"):
                engine = builder.build_engine(network, config)
            else:
                engine = func.invoke(
                    EngineFromBytes(
                        builder.build_serialized_network(network, config)))

            if hasattr(config.int8_calibrator, "free"):
                # Must go before engine check to ensure calibrator is freed on failures too.
                config.int8_calibrator.free()

            if not engine:
                G_LOGGER.critical(
                    "Invalid Engine. Please ensure the engine was built correctly"
                )
            return engine