Beispiel #1
0
    def try_permute(arr, shape):
        original_shape = arr.shape

        if sorted(arr.shape) != sorted(shape):
            G_LOGGER.extra_verbose("Array of shape: {:} cannot be permuted to: {:}".format(arr.shape, shape))
            return arr

        # We need to remove axes from the original shape as we use them to avoid
        # duplication in the permutation.
        arr_shape_indices = {index: dimlen for index, dimlen in enumerate(arr.shape)}

        # Find which axis in arr.shape corresponds to the specified size. Never returns duplicates.
        def find_axis(dimlen):
            nonlocal arr_shape_indices
            for index, d in arr_shape_indices.items():
                if d == dimlen:
                    del arr_shape_indices[index]
                    return index

        try:
            perm = [find_axis(dimlen) for dimlen in shape]
            arr = np.transpose(arr, perm)
        except Exception as err:
            G_LOGGER.extra_verbose("Skipping permutation due to {:}".format(err))
        else:
            if arr.shape != original_shape:
                G_LOGGER.info(
                    "Permuted array of shape: {:} to: {:} using permutation {:}".format(original_shape, arr.shape, perm)
                )
        return arr
Beispiel #2
0
        def generate_buffer(name, dtype, shape):
            if is_shape_tensor(name, dtype):
                buffer = np.array(shape, dtype=dtype)
                G_LOGGER.info(
                    "Assuming {:} is a shape tensor. Setting input values to: {:}. If this is not correct, "
                    "please set it correctly in 'input_metadata' or by providing --input-shapes".format(name, buffer),
                    mode=LogMode.ONCE,
                )
            elif np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.bool_):
                imin, imax = self._get_range(name, cast_type=int if np.issubdtype(dtype, np.integer) else bool)
                G_LOGGER.verbose(
                    "Input tensor: {:} | Generating input data in range: [{:}, {:}]".format(name, imin, imax),
                    mode=LogMode.ONCE,
                )
                # high is 1 greater than the max int drawn.
                buffer = rng.randint(low=imin, high=imax + 1, size=shape, dtype=dtype)
            else:
                fmin, fmax = self._get_range(name, cast_type=float)
                G_LOGGER.verbose(
                    "Input tensor: {:} | Generating input data in range: [{:}, {:}]".format(name, fmin, fmax),
                    mode=LogMode.ONCE,
                )
                buffer = (rng.random_sample(size=shape) * (fmax - fmin) + fmin).astype(dtype)

            buffer = np.array(buffer)  # To handle scalars, since the above functions return a float if shape is ().
            return buffer
Beispiel #3
0
    def __call__(self, args):
        run_results = misc.pickle_load(args.results)

        def meta_from_iter_result(iter_result):
            meta = TensorMetadata()
            for name, arr in iter_result.items():
                meta.add(name, dtype=arr.dtype, shape=arr.shape)
            return meta

        results_str = ""
        results_str += "==== Run Results ({:} runners) ====\n\n".format(
            len(run_results))

        for runner_name, iters in run_results.items():
            results_str += "---- Runner: {:} ({:} iterations) ----\n".format(
                runner_name, len(iters))

            for index, iter_result in enumerate(iters):
                if args.show_values:
                    for name, arr in iter_result.items():
                        results_str += "{:} [dtype={:}, shape={:}]\n{:}\n\n".format(
                            name, arr.dtype, arr.shape,
                            misc.indent_block(str(arr)))
                else:
                    iter_meta = meta_from_iter_result(iter_result)
                    if len(iters) > 1 and args.all:
                        results_str += misc.indent_block(
                            "Iteration: {:} | ".format(index))
                    results_str += "{:}\n".format(iter_meta)

                if not args.all:
                    break
            results_str += "\n"
        results_str = misc.indent_block(results_str, level=0)
        G_LOGGER.info(results_str)
Beispiel #4
0
    def call_impl(self):
        uff_model, input_names, input_shapes, output_names = self.uff_loader()

        builder = trt.Builder(get_trt_logger())
        network = builder.create_network()
        parser = trt.UffParser()
        # Input names should come from the converter, as a preprocessing script may have been applied to the frozen model.
        for name, shape in zip(input_names, input_shapes):
            # Default order is NCHW, only set to NHWC if we're reasonably certain that it is.
            input_order = self.uff_order
            if not self.uff_order:
                input_order = trt.UffInputOrder.NCHW
                if FormatManager.determine_format(shape) == DataFormat.NHWC:
                    input_order = trt.UffInputOrder.NHWC
            shape = shape[1:]
            G_LOGGER.verbose(
                "Registering UFF input: {:} with shape: {:} and input order: {:}"
                .format(name, shape, input_order))
            parser.register_input(name, shape, input_order)

        if output_names and output_names != constants.MARK_ALL:
            for name in output_names:
                G_LOGGER.verbose("Registering UFF output: " + str(name))
                parser.register_output(name)

        G_LOGGER.info(
            "Parsing UFF model with inputs: {:} and outputs: {:}".format(
                input_names, output_names))
        success = parser.parse_buffer(uff_model, network)
        if not success:
            G_LOGGER.critical("Could not parse UFF correctly")
        return builder, network, parser, input_shapes[0][0]
Beispiel #5
0
    def call_impl(self):
        """
        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        (graph, outputs), _ = util.invoke_if_callable(self._graph)

        if self.path:
            util.save_file(graph.as_graph_def().SerializeToString(),
                           dest=self.path)
        if self.tensorboard_dir:
            G_LOGGER.info("Writing tensorboard events to {:}".format(
                self.tensorboard_dir))
            train_writer = tf.compat.v1.summary.FileWriter(
                self.tensorboard_dir)
            train_writer.add_graph(graph)

        if self.engine_dir is not None:
            graphdef = graph.as_graph_def()
            segment_number = 0
            for node in graphdef.node:
                if node.op == "TRTEngineOp":
                    engine = node.attr["serialized_segment"].s
                    if self.engine_dir is not None:
                        util.save_file(
                            contents=engine,
                            dest=os.path.join(
                                self.engine_dir,
                                "segment-{:}".format(segment_number)))
                    segment_number += 1

        return graph, outputs
Beispiel #6
0
        def install_mod(raise_error=True):
            modname = name.split(".")[0]
            pkg = _MODULE_TO_PKG_NAME.get(modname, modname)
            extra_flags = _MODULE_EXTRA_FLAGS.get(modname, [])

            if version == LATEST_VERSION:
                extra_flags.append("--upgrade")
            elif version is not None:
                pkg += version

            cmd = config.INSTALL_CMD + [pkg] + extra_flags
            G_LOGGER.info(
                "{:} is required, but not installed. Attempting to install now.\n"
                "Running: {:}".format(pkg, " ".join(cmd)))
            status = sp.run(cmd)
            if status.returncode != 0:
                G_LOGGER.log(
                    "Could not automatically install required package: {:}. Please install it manually."
                    .format(pkg),
                    severity=G_LOGGER.CRITICAL
                    if raise_error else G_LOGGER.WARNING,
                )

            mod = importlib.import_module(name)
            return mod
Beispiel #7
0
def lazy_write(contents, path, mode="wb"):
    """
    Writes a file to the specified path.

    Args:
        contents (Callable() -> bytes):
                Either a bytes-like object that can be written to disk, or a callable which will return such an object.
        path (str): The path to write to.


        mode(str): The mode to use when writing. Defaults to "wb".

    Returns:
        str: The complete file path, or `None` if nothing was written.
    """
    if path is not None:
        dir_path = os.path.dirname(path)
        if dir_path and not os.path.exists(dir_path):
            G_LOGGER.verbose("{:} does not exist, creating now.".format(dir_path))
            os.makedirs(dir_path, exist_ok=True)

        contents, _ = try_call(contents)

        with open(path, mode) as f:
            G_LOGGER.info("Writing to {:}".format(path))
            f.write(contents)
        return path
    return None
Beispiel #8
0
    def call_impl(self):
        """
        Returns:
            onnx.ModelProto: The ONNX model.
        """
        (graph, output_names), _ = util.invoke_if_callable(self._graph)
        input_names = list(tf_util.get_input_metadata(graph).keys())

        if self.fold_constant:
            G_LOGGER.info(
                "Folding constants in graph using tf2onnx.tfonnx.tf_optimize")
        graphdef = graph.as_graph_def()
        if self.optimize:
            graphdef = tf2onnx.tfonnx.tf_optimize(
                input_names,
                output_names,
                graph.as_graph_def(),
                fold_constant=self.fold_constant)

        with tf.Graph().as_default() as graph, tf.compat.v1.Session(
                graph=graph) as sess:
            tf.import_graph_def(graphdef, name="")

            onnx_graph = tf2onnx.tfonnx.process_tf_graph(
                graph,
                input_names=input_names,
                output_names=output_names,
                opset=self.opset)
            if self.optimize:
                onnx_graph = tf2onnx.optimizer.optimize_graph(onnx_graph)
            return onnx_graph.make_model("model")
Beispiel #9
0
 def display_inputs(input_data):
     inputs_str = ""
     inputs_str += "==== Data ({:} iterations) ====\n".format(
         len(input_data))
     inputs_str += str_from_iters(input_data) + "\n"
     inputs_str = util.indent_block(inputs_str, level=0).strip()
     G_LOGGER.info(inputs_str)
Beispiel #10
0
def load_file(src, mode="rb", description=None):
    """
    Reads from the specified source path or file-like object.

    Args:
        src (Union[str, file-like]): The path or file-like object to read from.


        mode (str): The mode to use when reading. Defaults to "rb".
        description (str): A description of what is being read.

    Returns:
        Union[str, bytes, None]: The contents read.

    Raises:
        Exception: If the file or file-like object could not be read.
    """
    if description is not None:
        G_LOGGER.info("Loading {:} from {:}".format(description, src))

    if is_file_like(src):
        warn_if_wrong_mode(src, mode)
        # Reset cursor position after reading from the beginning of the file.
        prevpos = src.tell()
        if src.seekable():
            src.seek(0)
        contents = src.read()
        if src.seekable():
            src.seek(prevpos)
        return contents
    else:
        with open(src, mode) as f:
            return f.read()
Beispiel #11
0
    def find(self):
        which_layers = {"forward": "first", "reverse": "last"}[self.args.mode]

        num_layers = 0
        # Keep track of what works and what doesn't
        known_good = self.network.num_layers + 1
        known_bad = 0

        indices = None
        while known_good != known_bad and num_layers != known_good:
            with G_LOGGER.indent():
                G_LOGGER.info(
                    "Last known good: {which_layers} {known_good} layer(s) in {precision} precision.\n"
                    "Last known bad: {which_layers} {known_bad} layer(s) in {precision} precision"
                    .format(which_layers=which_layers,
                            known_good=min(known_good,
                                           self.network.num_layers),
                            precision=self.precision,
                            known_bad=known_bad))

            indices = self.layer_indices(num_layers)
            self.mark_layers(indices)
            success = self.check_network("{:}-{:}".format(
                which_layers, num_layers))
            if success:
                # Try something between
                known_good = num_layers
            else:
                known_bad = num_layers
            # Try something in between the known good value, and the known bad value.
            num_layers = math.ceil((known_bad + known_good) / 2.0)

        if known_good <= self.network.num_layers:
            return indices
Beispiel #12
0
 def select_layers(self):
     self.iteration += 1
     if self.direction == "forward":
         G_LOGGER.info("Selecting first {:} layer(s) to run in higher precision".format(self.num_layers))
         return range(0, self.num_layers)
     else:
         G_LOGGER.info("Selecting last {:} layer(s) to run in higher precision".format(self.num_layers))
         return range(self.max_layers - self.num_layers, self.max_layers)
Beispiel #13
0
 def is_not_nan(output):
     nans = np.isnan(output)
     if np.any(nans):
         G_LOGGER.error("NaN Detected | One or more NaNs were encountered in this output")
         G_LOGGER.info("Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display locations of NaNs", mode=LogMode.ONCE)
         G_LOGGER.extra_verbose("Note: NaNs at:\n{:}".format(nans))
         return False
     return True
Beispiel #14
0
def run_subtool(subtool, additional_opts, disable_verbose=False):
    cmd = [sys.executable, polygraphy, subtool] + additional_opts
    if not disable_verbose:
        cmd += ["-vvvvv"]
    G_LOGGER.info("Running command: {:}".format(" ".join(cmd)))
    status = sp.run(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
    check_subprocess(status)
    return status
Beispiel #15
0
    def export_graph(self, graph, args, do_type_check=True):
        if not args.no_cleanup:
            graph.cleanup()
        if not args.no_toposort:
            graph.toposort()

        G_LOGGER.info("Writing model to: {output}. To see more details about the model, use: polygraphy inspect model {output} --mode=basic".format(output=args.output))
        onnx.save(gs.export_onnx(graph, do_type_check=do_type_check), args.output)
Beispiel #16
0
 def is_finite(output):
     non_finite = np.logical_not(np.isfinite(output))
     if np.any(non_finite):
         G_LOGGER.error("Inf Detected | One or more non-finite values were encountered in this output")
         G_LOGGER.info("Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display non-finite values", mode=LogMode.ONCE)
         G_LOGGER.extra_verbose("Note: non-finite values at:\n{:}".format(non_finite))
         G_LOGGER.extra_verbose("Note: non-finite values:\n{:}".format(output[non_finite]))
         return False
     return True
Beispiel #17
0
def log_output_stats(output,
                     info_hist=False,
                     runner_name=None,
                     hist_range=None):
    ret = str_output_stats(output, runner_name)
    G_LOGGER.info(ret)
    with G_LOGGER.indent():
        # Show histogram on failures.
        G_LOGGER.log(lambda: str_histogram(output, hist_range),
                     severity=G_LOGGER.INFO if info_hist else G_LOGGER.VERBOSE)
Beispiel #18
0
    def run(self, args):
        import tensorrt as trt

        if not self.makers[TrtLoaderArgs].calibration_cache:
            G_LOGGER.warning(
                "Not using a calibration cache. Using a calibration cache may significantly speed up the search process"
            )

        self.precision = {
            "float32": trt.float32,
            "float16": trt.float16
        }[args.precision]
        if self.precision == trt.float16 and not self.makers[
                TrtLoaderArgs].fp16:
            self.makers[TrtLoaderArgs].fp16 = True
        if self.precision == trt.float16 and not self.makers[
                TrtLoaderArgs].int8:
            G_LOGGER.warning(
                "Using float16 as the higher precision, but float16 is also the lowest precision available. Did you mean to set --int8 as well?"
            )

        if not any([
                self.makers[TrtLoaderArgs].tf32,
                self.makers[TrtLoaderArgs].fp16,
                self.makers[TrtLoaderArgs].int8
        ]):
            G_LOGGER.critical(
                "Please enable at least one precision besides float32 (e.g. --int8, --fp16)"
            )

        if self.makers[ModelArgs].model_type == "engine":
            G_LOGGER.critical(
                "The precision tool cannot work with engines, as they cannot be modified. "
                "Please provide a different format, such as an ONNX or TensorFlow model."
            )

        self.args = args

        self.golden = OrderedDict()
        self.golden.update(misc.pickle_load(args.golden))

        self.builder, self.network, self.parser = func.invoke(
            self.makers[TrtLoaderArgs].get_trt_network_loader())
        with self.builder, self.network, self.parser:
            indices = self.find()

        if indices is not None:
            G_LOGGER.info(
                "To achieve acceptable accuracy, try running layers: {:} in {:} precision"
                .format(indices, self.precision))
        else:
            G_LOGGER.critical(
                "Could not find a configuration that resulted in acceptable accuracy"
            )
Beispiel #19
0
        def display_results(results):
            results_str = ""
            results_str += "==== Run Results ({:} runners) ====\n\n".format(
                len(results))

            for runner_name, iters in results.items():
                results_str += "---- {:35} ({:} iterations) ----\n".format(
                    runner_name, len(iters))
                results_str += str_from_iters(iters) + "\n"

            results_str = util.indent_block(results_str, level=0).strip()
            G_LOGGER.info(results_str)
Beispiel #20
0
    def __init__(
        self,
        network_loader=None,
        max_workspace_size=None,
        max_batch_size=None,
        fp16=None,
        tf32=None,
        load_engine=None,
        save_engine=None,
        layerwise=False,
        plugins=[],
        name=None,
    ):
        """
        Creates a runner that manages a single TensorRT engine.


            network_loader (BaseModelLoader):
                    A loader that returns a TRT builder, network, parser and input shapes.
            max_workspace_size (int): The maximum workspace size.
            max_batch_size (int): The maximum batch size.
            fp16 (bool): Whether to run in fp16 mode
            layerwise (bool): Whether to retrieve the outputs of every layer in the network.
            name (str):
                    The human-readable name prefix to use for this runner.
                    A runner count and timestamp will be appended to this prefix.
        """
        G_LOGGER.warning(
            "TrtLegacyRunner is deprecated, and will be removed in a future release"
        )
        # Load any user-supplied plugin libraries. This must happen before everything else, including engine deserialization.
        if plugins:
            import ctypes

            for plugin in plugins:
                path = os.path.abspath(plugin)
                G_LOGGER.info("Loading plugin library: {:}".format(path))
                ctypes.CDLL(path)

        # Choose a unique name for this runner.
        super().__init__(name=name, prefix="trt-legacy-runner")

        # Save parameters for activate and deactivate.
        self.network_loader = network_loader
        self.max_workspace_size = util.default(max_workspace_size, 1 << 24)
        self.fp16 = util.default(fp16, False)
        self.tf32 = util.default(tf32, False)
        self.load_engine = load_engine

        self.engine_path = save_engine

        self.layerwise = layerwise
        self.max_batch_size = max_batch_size
Beispiel #21
0
 def try_reshape(arr, shape):
     original_shape = arr.shape
     try:
         arr = arr.reshape(shape)
     except ValueError:
         G_LOGGER.warning(
             "Could not reshape array from shape: {:} to {:}. Skipping reshape.".format(arr.shape, shape)
         )
     else:
         if arr.shape != original_shape:
             G_LOGGER.info("Reshaped array from shape: {:} to: {:}".format(original_shape, arr.shape))
     return arr
Beispiel #22
0
    def __call__(self, args):
        graph = super().setup(args)
        config = Config.from_graph(graph)
        config_json = json.dumps(config, indent=constants.TAB)
        G_LOGGER.info(
            "Please do NOT modify the node 'id' values in the configuration file, or things may not work!"
        )

        if args.output:
            with open(args.output, "w") as f:
                f.write(config_json)
        else:
            print(config_json)
Beispiel #23
0
    def call_impl(self, *args, **kwargs):
        """
        Returns:
            object:
                    The provided ``obj`` argument, or its return value if it is
                    callable. Returns ``None`` if ``obj`` was not set.
        """
        for plugin in self.plugins:
            G_LOGGER.info("Loading plugin library: {:}".format(plugin))
            ctypes.CDLL(plugin)

        ret, _ = util.invoke_if_callable(self.obj, *args, **kwargs)
        return ret
Beispiel #24
0
 def call_impl(self):
     """
     Returns:
         onnx.ModelProto: The ONNX model
     """
     G_LOGGER.info("Loading model: {:}".format(self.path))
     # If external_data_dir is not None, we'll load external data ourselves
     model = onnx.load(self.path,
                       load_external_data=self.external_data_dir is None)
     if self.external_data_dir is not None:
         external_data_helper.load_external_data_for_model(
             model, self.external_data_dir)
     return model
Beispiel #25
0
def save_file(contents, dest, mode="wb", description=None):
    """
    Writes text or binary data to the specified destination path or file-like object.

    Args:
        contents (bytes):
                A bytes-like object that can be written to disk.
        dest (Union[str, file-like]):
                The path or file-like object to write to.


        mode (str): The mode to use when writing. Defaults to "wb".
        description (str): A description of what is being written.

    Returns:
        Union[str, file-like, None]: The complete file path or file-like object.

    Raises:
        Exception: If the path could not be written to, or if the file-like object could not be written to.
    """
    if description is not None:
        G_LOGGER.info("Saving {:} to {:}".format(description, dest))

    if is_file_like(dest):
        warn_if_wrong_mode(dest, mode)
        bytes_written = dest.write(contents)
        dest.flush()
        try:
            content_bytes = len(contents.encode())
        except:
            pass
        else:
            if bytes_written != content_bytes:
                G_LOGGER.warning(
                    "Could not write entire file. Note: file contains {:} bytes, but only "
                    "{:} bytes were written".format(content_bytes,
                                                    bytes_written))
    else:
        dir_path = os.path.dirname(dest)
        if dir_path:
            dir_path = os.path.realpath(dir_path)
            if not os.path.exists(dir_path):
                G_LOGGER.verbose(
                    "{:} does not exist, creating now.".format(dir_path))
            os.makedirs(dir_path, exist_ok=True)

        with open(dest, mode) as f:
            f.write(contents)
    return dest
Beispiel #26
0
    def call_impl(self):
        """
        Returns:
            onnx.ModelProto: The modified ONNX model.
        """
        model = self.load()

        G_LOGGER.info("Converting float tensors to float16")
        try:
            model = onnxmltools.utils.float16_converter.convert_float_to_float16(
                model, keep_io_types=True, disable_shape_inference=True)
        except TypeError:  # Using an old version of onnxmltools
            model = onnxmltools.utils.float16_converter.convert_float_to_float16(
                model)

        return model
Beispiel #27
0
        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                input_metadata = active_runner.get_input_metadata()
                G_LOGGER.info("{:35}\n---- Model Input(s) ----\n{:}".format(active_runner.name, input_metadata),
                              mode=LogMode.ONCE)

                # DataLoaderCache will ensure that the feed_dict does not contain any extra entries
                # based on the provided input_metadata.
                loader_cache.set_input_metadata(input_metadata)

                if warm_up:
                    G_LOGGER.start("{:35} | Running {:} warm-up run(s)".format(active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning("{:} warm-up run(s) were requested, but data loader did not supply any data. "
                                         "Skipping warm-up run(s)".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose("Warm-up Input Buffers:\n{:}".format(util.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for _ in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)
                    G_LOGGER.finish("{:35} | Finished {:} warm-up run(s)".format(active_runner.name, warm_up))

                # Then, actual iterations.
                index = 0
                iteration_results = []

                total_runtime = 0
                for index, feed_dict in enumerate(loader_cache):
                    G_LOGGER.extra_verbose(lambda: "{:35} | Feeding inputs:\n{:}".format(active_runner.name, util.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    total_runtime += runtime
                    # Without a deep copy here, outputs will always reference the output of the last run
                    iteration_results.append(IterationResult(outputs=copy.deepcopy(outputs), runtime=runtime, runner_name=active_runner.name))

                    G_LOGGER.info(lambda: "{:35}\n---- Model Output(s) ----\n{:}".format(
                                            active_runner.name, TensorMetadata().from_feed_dict(outputs)),
                                  mode=LogMode.ONCE)
                    G_LOGGER.extra_verbose(lambda: "{:35} | Inference Time: {:.3f} ms | Received outputs:\n{:}".format(
                                                        active_runner.name, runtime * 1000.0, util.indent_block(outputs)))

                total_runtime_ms = total_runtime * 1000.0
                G_LOGGER.finish("{:35} | Completed {:} iteration(s) in {:.4g} ms | Average inference time: {:.4g} ms.".format(active_runner.name, index + 1, total_runtime_ms, total_runtime_ms / float(index + 1)))
                return iteration_results
Beispiel #28
0
    def save(self, dest):
        """
        Save this script to the specified destination.

        Args:
            dest (file-like):
                    A file-like object that defines ``write()``, ``isatty``, and has a `name` attribute.
        """
        with dest:
            dest.write(str(self))

            path = dest.name
            # Somehow, piping fools isatty, e.g. `polygraphy run --gen-script - | cat`
            if not dest.isatty() and path not in ["<stdout>", "<stderr>"]:
                G_LOGGER.info("Writing script to: {:}".format(path))
                # Make file executable
                os.chmod(path, os.stat(path).st_mode | 0o111)
Beispiel #29
0
    def fallback_inference(self, onnx_model):
        """
        Run inference with ONNX-Runtime.

        This can be used to retrieve values/shapes/data types for all
        tensors in the model when other shape inference approaches fail.

        Args:
            onnx_model (onnx.ModelProto):
                    The ONNX model in which to infer shapes.
            data_loader_args (DataLoaderArgs):
                    The data loader argument group to use to generate input data.

        Returns:
            (OrderedDict[str, np.ndarray], TensorMetadata):
                    1. Mapping of values for all tensors in the model, including inputs.
                        Values are loaded lazily when first accessed so as to save memory.
                    2. Metadata for every tensor in the model.
        """
        from polygraphy.comparator import IterationResult

        with G_LOGGER.verbosity(G_LOGGER.severity + 10):
            load_model = onnx_backend.ModifyOutputs(onnx_model,
                                                    outputs=constants.MARK_ALL,
                                                    copy=True)
            with onnxrt_backend.OnnxrtRunner(
                    onnxrt_backend.SessionFromOnnx(
                        onnx_backend.BytesFromOnnx(load_model))) as runner:
                # We want to set input_metadata only - not user_input_metadata, so that user_input_metadata
                # will be populated by the --model-inputs argument.
                data_loader = self.data_loader_args.get_data_loader()
                data_loader.input_metadata = runner.get_input_metadata()
                feed_dict = data_loader[0]

                with G_LOGGER.verbosity(G_LOGGER.severity - 10):
                    G_LOGGER.info(
                        "Running fallback shape inference using input metadata:\n{:}"
                        .format(TensorMetadata.from_feed_dict(feed_dict)))

                outputs = runner.infer(feed_dict)
                # We include the inputs here so that we have values for all tensors in the model.
                outputs.update(feed_dict)
                # Use IterationResult here since it can handle very large tensors by saving to disk.
                # Layerwise outputs might otherwise take up too much memory.
                return IterationResult(outputs), TensorMetadata.from_feed_dict(
                    outputs)
Beispiel #30
0
    def call_impl(self):
        """
        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        from tensorflow.contrib import tensorrt as tf_trt

        (graph, output_names), _ = util.invoke_if_callable(self._graph)

        precision_mode = "FP16" if self.fp16 else "FP32"
        precision_mode = "INT8" if self.int8 else precision_mode

        G_LOGGER.info(
            "For TF-TRT, using outputs={:}, max_workspace_size_bytes={:}, max_batch_size={:}, "
            "minimum_segment_size={:}, is_dynamic_op={:}, precision_mode={:}".
            format(
                output_names,
                self.max_workspace_size,
                self.max_batch_size,
                self.minimum_segment_size,
                self.is_dynamic_op,
                precision_mode,
            ))

        graphdef = tf_trt.create_inference_graph(
            graph.as_graph_def(),
            outputs=output_names,
            max_workspace_size_bytes=self.max_workspace_size,
            max_batch_size=self.max_batch_size,
            minimum_segment_size=self.minimum_segment_size,
            is_dynamic_op=self.is_dynamic_op,
            precision_mode=precision_mode,
        )

        segment_number = 0
        for node in graphdef.node:
            if node.op == "TRTEngineOp":
                engine = node.attr["serialized_segment"].s
                segment_number += 1
        G_LOGGER.info(
            "Found {:} engines in TFTRT graph".format(segment_number))

        with tf.Graph().as_default() as graph:
            tf.import_graph_def(graphdef, name="")
            return graph, tf_util.get_graph_output_names(graph)