Exemple #1
0
        def get_batch(self, names):
            try:
                host_buffers = next(self.data_loader_iter)
            except StopIteration:
                if not self.num_batches:
                    G_LOGGER.warning(
                        "Calibrator data loader provided no data. Possibilities include: (1) data loader "
                        "has no data to provide, (2) data loader was a generator, and the calibrator is being "
                        "reused across multiple loaders (generators cannot be rewound)"
                    )
                return None
            else:
                self.num_batches += 1

            for name, host_buffer in host_buffers.items():
                if name not in self.device_buffers:
                    self.device_buffers[name] = DeviceBuffer(
                        shape=host_buffer.shape, dtype=host_buffer.dtype)
                    G_LOGGER.verbose("Allocated: {:}".format(
                        self.device_buffers[name]))
                    if self.num_batches > 1:
                        G_LOGGER.warning(
                            "The calibrator data loader provided an extra input ({:}) compared to the last set of inputs.\n"
                            "Should this input be removed, or did you accidentally omit an input before?"
                            .format(name))

                device_buffer = self.device_buffers[name]
                device_buffer.copy_from(host_buffer)
            return [
                device_buffer.address()
                for device_buffer in self.device_buffers.values()
            ]
Exemple #2
0
        def allocate_buffers(engine):
            input_buffers = OrderedDict()
            output_buffers = OrderedDict()
            bindings = []
            stream = cuda.Stream()
            G_LOGGER.verbose("Using batch size: " +
                             str(engine.max_batch_size) +
                             " during buffer allocation")
            for binding in engine:
                shape = (engine.max_batch_size, ) + tuple(
                    engine.get_binding_shape(binding))
                dtype = engine.get_binding_dtype(binding)

                device_mem = cuda.DeviceBuffer(shape=shape,
                                               dtype=trt.nptype(dtype))
                G_LOGGER.extra_verbose("Tensor: "
                                       "{:40} | Allocated: {:}".format(
                                           binding, device_mem))

                if engine.binding_is_input(binding):
                    input_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        None, device_mem)
                else:
                    host_mem = np.empty(shape=shape, dtype=trt.nptype(dtype))
                    output_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        host_mem, device_mem)
            return input_buffers, output_buffers, stream
Exemple #3
0
    def __call__(self):
        """
        Builds a TensorRT engine.

        Returns:
            trt.ICudaEngine: The engine that was created.
        """
        # If network is a callable, then we own its return value
        ret, owning = misc.try_call(self._network)
        builder, network, parser = misc.unpack_args(ret, num=3)

        with contextlib.ExitStack() as stack:
            provided = "Builder and Network" if parser is None else "Builder, Network, and Parser"
            if owning:
                stack.enter_context(builder)
                stack.enter_context(network)
                if parser is not None:
                    stack.enter_context(parser)
            else:
                G_LOGGER.verbose("{:} were provided directly instead of via a Callable. This loader will not assume ownership. "
                               "Please ensure that they are freed.".format(provided))

            network_log_mode = "full" if G_LOGGER.severity <= G_LOGGER.ULTRA_VERBOSE else "attrs"
            G_LOGGER.super_verbose(lambda: ("Displaying TensorRT Network:\n" + trt_util.str_from_network(network, mode=network_log_mode)))

            config, _ = misc.try_call(self._config, builder, network)
            G_LOGGER.info("Building engine with configuration: {:}".format(trt_util.str_from_config(config)))
            engine = builder.build_engine(network, config)
            if not engine:
                G_LOGGER.critical("Invalid Engine. Please ensure the engine was built correctly")

            if hasattr(config.int8_calibrator, "free"):
                config.int8_calibrator.free()

            return engine
Exemple #4
0
    def __call__(self):
        uff_model, input_names, input_shapes, output_names = self.uff_loader()

        builder = trt.Builder(TRT_LOGGER)
        network = builder.create_network()
        parser = trt.UffParser()
        # Input names should come from the converter, as a preprocessing script may have been applied to the frozen model.
        for name, shape in zip(input_names, input_shapes):
            # Default order is NCHW, only set to NHWC if we're reasonably certain that it is.
            input_order = self.uff_order
            if not self.uff_order:
                input_order = trt.UffInputOrder.NCHW
                if FormatManager.determine_format(shape) == DataFormat.NHWC:
                    input_order = trt.UffInputOrder.NHWC
            shape = shape[1:]
            G_LOGGER.verbose(
                "Registering UFF input: {:} with shape: {:} and input order: {:}"
                .format(name, shape, input_order))
            parser.register_input(name, shape, input_order)

        if output_names and output_names != constants.MARK_ALL:
            for name in output_names:
                G_LOGGER.verbose("Registering UFF output: " + str(name))
                parser.register_output(name)

        G_LOGGER.info(
            "Parsing UFF model with inputs: {:} and outputs: {:}".format(
                input_names, output_names))
        success = parser.parse_buffer(uff_model, network)
        if not success:
            G_LOGGER.critical("Could not parse UFF correctly")
        return builder, network, parser, input_shapes[0][0]
Exemple #5
0
def mark_layerwise(network):
    # Layers within loops cannot be marked as network outputs.
    LOOP_START_NAMES = ["TRIP_LIMIT", "ITERATOR", "RECURRENCE"]
    LOOP_END_NAMES = ["LOOP_OUTPUT"]
    LOOP_START_LAYERS = [getattr(trt.LayerType, attr) for attr in LOOP_START_NAMES if hasattr(trt.LayerType, attr)]
    LOOP_END_LAYERS = [getattr(trt.LayerType, attr) for attr in LOOP_END_NAMES if hasattr(trt.LayerType, attr)]
    EXCLUDE_OUTPUT_LAYERS = [trt.LayerType.SHAPE, trt.LayerType.CONSTANT]
    outputs = []
    in_loop = False
    for layer in network:
        if layer.type in LOOP_START_LAYERS:
            G_LOGGER.warning("Loop detected. Please ensure the network is topologically sorted so that layers within "
                             "the loop body are not marked as network outputs in layerwise mode", mode=LogMode.ONCE)
            in_loop = True
        elif layer.type in LOOP_END_LAYERS:
            in_loop = False

        should_mark_layer = not in_loop and layer.type not in EXCLUDE_OUTPUT_LAYERS
        if should_mark_layer:
            for index in range(layer.num_outputs):
                tensor = layer.get_output(index)
                outputs.append(tensor.name)

    G_LOGGER.verbose("Marking {:} tensors as outputs".format(len(outputs)))
    mark_outputs(network, outputs)
Exemple #6
0
def mark_layerwise(network):
    # Layers within loops cannot be marked as network outputs.
    LOOP_START_NAMES = ["TRIP_LIMIT", "ITERATOR"]
    LOOP_END_NAMES = ["LOOP_OUTPUT"]
    LOOP_START_LAYERS = [getattr(trt.LayerType, attr) for attr in LOOP_START_NAMES if hasattr(trt.LayerType, attr)]
    LOOP_END_LAYERS = [getattr(trt.LayerType, attr) for attr in LOOP_END_NAMES if hasattr(trt.LayerType, attr)]
    EXCLUDE_OUTPUT_LAYERS = [trt.LayerType.SHAPE, trt.LayerType.CONSTANT]
    num_tensors_marked = 0
    in_loop = False
    for layer in network:
        if layer.type in LOOP_START_LAYERS:
            G_LOGGER.warning("Loop detected. Please ensure the network is topologically sorted so that layers within "
                             "the loop body are not marked as network outputs in layerwise mode")
            in_loop = True
        elif layer.type in LOOP_END_LAYERS:
            in_loop = False

        def should_mark_layer():
            return not in_loop and layer.type not in EXCLUDE_OUTPUT_LAYERS

        if should_mark_layer():
            for index in range(layer.num_outputs):
                tensor = layer.get_output(index)
                if not tensor.is_network_output:
                    G_LOGGER.verbose("Marking {:} as an output".format(tensor.name))
                    network.mark_output(tensor)
                    num_tensors_marked += 1
    G_LOGGER.verbose("Marking {:} tensors as outputs".format(num_tensors_marked))
Exemple #7
0
    def add_to_script(self, script, data_loader_name):
        script.add_import(imports=["Comparator"], frm="polygraphy.comparator")
        script.add_import(imports=["sys"])

        RESULTS_VAR_NAME = Inline("results")

        comparator_run = Script.invoke("Comparator.run",
                                       script.get_runners(),
                                       warm_up=self.warm_up,
                                       data_loader=data_loader_name,
                                       use_subprocess=self.use_subprocess,
                                       save_inputs_path=self.save_inputs)
        script.append_suffix(
            Script.format_str("\n# Runner Execution\n{results} = {:}",
                              Inline(comparator_run),
                              results=RESULTS_VAR_NAME))

        if self.save_results:
            G_LOGGER.verbose("Will save runner results to: {:}".format(
                self.save_results))
            script.add_import(imports=["misc"], frm="polygraphy.util")
            script.append_suffix(
                Script.format_str(
                    "\n# Save results\nmisc.pickle_save({:}, {results})",
                    self.save_results,
                    results=RESULTS_VAR_NAME))

        return RESULTS_VAR_NAME
Exemple #8
0
    def set_shapes_from_feed_dict(self, feed_dict):
        """
        Sets context shapes according to the provided feed_dict, then resizes
        buffers as needed.

        Args:
            feed_dict (OrderedDict[str, numpy.ndarray]): A mapping of input tensor names to corresponding input NumPy arrays.

        Returns:
            Tuple[int, int]: The start and end binding indices of the modified bindings.
        """
        def is_dynamic_shape_input(binding):
            try:
                self.context.engine.get_profile_shape_input(0, binding)
                return True
            except RuntimeError:
                return False

        start_binding, end_binding = trt_util.get_active_profile_bindings(
            self.context)
        for name, inp in feed_dict.items():
            binding = start_binding + self.context.engine[name]
            shape = inp.shape
            # Only set shapes if required.
            # get_shape/get_binding_shape will return what a shape input/data input is currently set to.
            if is_dynamic_shape_input(binding):  # For input shape tensors
                G_LOGGER.verbose(
                    "Setting shape binding: {:} (index: {:}) to: {:}".format(
                        name, binding, inp))
                if tuple(self.context.get_shape(binding)) != tuple(inp):
                    self.context.set_shape_input(binding, inp)

            elif misc.is_shape_dynamic(
                    self.context.engine.get_binding_shape(binding)):
                G_LOGGER.verbose(
                    "Setting binding: {:} (index: {:}) to shape: {:}".format(
                        name, binding, shape))
                if tuple(self.context.get_binding_shape(binding)) != tuple(
                        shape):
                    self.context.set_binding_shape(binding, shape)

        if not self.context.all_binding_shapes_specified:
            G_LOGGER.critical(
                "Some input shapes were not specified.\nNote: Network inputs are: {:}"
                .format(self.get_input_metadata()))
        if not self.context.all_shape_inputs_specified:
            G_LOGGER.critical(
                "Some shape inputs were not specified.\nNote: Network inputs are: {:}"
                .format(self.get_input_metadata()))

        # Resize device buffers - host buffers will be automatically resized by copy_to
        for binding in range(start_binding, end_binding):
            name = self.context.engine[
                binding -
                start_binding]  # Use profile 0 binding names for all buffers.
            shape = tuple(self.context.get_binding_shape(binding))
            self.device_buffers[name].resize(shape)

        return start_binding, end_binding
Exemple #9
0
def get_input_metadata(graph):
    input_tensors = []
    input_nodes = find_nodes_by_ops(graph.as_graph_def(), ["Placeholder", "FIFOQueue"])
    G_LOGGER.verbose("Found input tensors: {:}".format(["{:}: {:}".format(n.name, n.op) for n in input_nodes]))
    for node in input_nodes:
        input_tensors.append(graph.get_tensor_by_name(node.name + ":0"))

    G_LOGGER.verbose("Retrieved TensorFlow input_tensors: {:}".format(input_tensors))
    return get_tensor_metadata(input_tensors)
Exemple #10
0
        def __init__(self):
            # Must explicitly initialize parent for any trampoline class! Will mysteriously segfault without this.
            BaseClass.__init__(self)

            self.data_loader = data_loader
            self._cache = cache
            self.device_buffers = OrderedDict()
            self.reset()
            G_LOGGER.verbose("Created calibrator [cache={:}]".format(self._cache))

            self.batch_size = misc.default_value(batch_size, 1)
Exemple #11
0
    def __call__(self):
        """
        Loads an ONNX model from a file.

        Returns:
            onnx.ModelProto: The ONNX model
        """
        import onnx
        misc.log_module_info(onnx)

        G_LOGGER.verbose("Loading ONNX model: {:}".format(self.path))
        return onnx_util.check_model(onnx.load(self.path))
Exemple #12
0
def check_model(model):
    try:
        import onnx
    except:
        G_LOGGER.warning("Could not import onnx module, skipping model check")

    try:
        onnx.checker.check_model(model)
        G_LOGGER.verbose("ONNX Checker Passed")
    except onnx.checker.ValidationError as err:
        G_LOGGER.warning("ONNX Checker exited with an error:\n{:}".format(err))
    finally:
        return model
Exemple #13
0
def infer_shapes(model):
    try:
        import onnx.shape_inference
    except:
        G_LOGGER.warning("Could not import onnx.shape_inference module, skipping shape inference")

    try:
        model = onnx.shape_inference.infer_shapes(model)
        G_LOGGER.verbose("ONNX Shape Inference completed successfully")
    except Exception as err:
        G_LOGGER.warning("ONNX shape inference exited with an error:\n{:}".format(err))
    finally:
        return model
Exemple #14
0
 def default_find_output_func(output_name, index, iter_result):
     found_name = misc.find_in_dict(output_name, iter_result, index)
     if found_name is None:
         return None
     elif found_name != output_name:
         exact_match = misc.find_in_dict(found_name, iter_result0)
         if exact_match == found_name:
             G_LOGGER.verbose("Will not compare {:} with {:}, since the former already has an exact match: {:}".format(
                                 found_name, output_name, exact_match))
             return None # If the found output is being compared against another output already, skip this non-exact match
         G_LOGGER.warning("Output names did not match exactly. Assuming {:} output: {:} "
                         "corresponds to output: {:}".format(
                             iter_result.runner_name, found_name, output_name))
     return [found_name]
Exemple #15
0
    def activate_impl(self):
        def make_buffers(engine):
            """
            Creates empty host and device buffers for the specified engine.
            Always uses binding names from Profile 0.
            """
            device_buffers = OrderedDict()
            host_output_buffers = OrderedDict()

            for idx in range(trt_util.get_bindings_per_profile(engine)):
                binding = engine[idx]
                dtype = trt.nptype(engine.get_binding_dtype(binding))
                device_buffers[binding] = cuda.DeviceBuffer(dtype=dtype)
                if not engine.binding_is_input(binding):
                    host_output_buffers[binding] = np.empty(shape=tuple(),
                                                            dtype=dtype)
            G_LOGGER.extra_verbose(
                "Created device buffers: {:}".format(device_buffers))
            return device_buffers, host_output_buffers

        engine_or_context, owning = misc.try_call(self._engine_or_context)

        self.engine, self.owns_engine = None, False
        self.context, self.owns_context = None, False

        if isinstance(engine_or_context, trt.ICudaEngine):
            self.engine = engine_or_context
            self.owns_engine = owning
            self.context = self.engine.create_execution_context()
            if not self.context:
                G_LOGGER.critical(
                    "Invalid Context. See error log for details.")
        elif isinstance(engine_or_context, trt.IExecutionContext):
            self.context = engine_or_context
            self.owns_context = owning
        else:
            G_LOGGER.critical(
                "Invalid Engine or Context. Please ensure the engine was built correctly. See error log for details."
            )

        if not owning:
            G_LOGGER.verbose(
                "Object was provided directly instead of via a Callable. This runner will not assume ownership. "
                "Please ensure it is freed.")

        self.device_buffers, self.host_output_buffers = make_buffers(
            self.context.engine)
        self.stream = cuda.Stream()
Exemple #16
0
    def activate_impl(self):
        # If engine is a callable, then we own the engine
        self.engine, self.owning = misc.try_call(self._engine)

        if not self.engine:
            G_LOGGER.critical(
                "Invalid Engine. Please ensure the engine was built correctly")

        if not self.owning:
            G_LOGGER.verbose(
                "Engine was provided directly instead of via a Callable. This runner will not assume ownership. "
                "Please ensure the engine is freed.")

        self.buffers = Buffers.from_engine(self.engine)
        self.stream = cuda.Stream()

        self.context = self.engine.create_execution_context()
Exemple #17
0
    def set_input_metadata(self, input_metadata):
        """
        Set the input metadata for the data loader.

        Args:
            input_metadata (TensorMetadata):
                    Input Metadata, including shape and type information. The cache may attempt to transform inputs to
                    match the specified input_metadata when data already in the cache does not exactly match.
        """
        self.input_metadata = input_metadata
        with contextlib.suppress(AttributeError):
            self.data_loader.input_metadata = input_metadata
        if not self.cache:
            G_LOGGER.verbose("Loading inputs from data loader")
            self.cache = list(self.data_loader)
            if not self.cache:
                G_LOGGER.warning("Data loader did not yield any input data.")
Exemple #18
0
    def __call__(self):
        """
        Creates a TensorFlow session.

        Returns:
            tf.Session: The TensorFlow session.
        """
        config, _ = misc.try_call(self.config)
        (graph, output_names), _ = misc.try_call(self.graph)

        with graph.as_default() as graph, tf.compat.v1.Session(
                graph=graph, config=config).as_default() as sess:
            G_LOGGER.verbose(
                "Using TensorFlow outputs: {:}".format(output_names))
            G_LOGGER.extra_verbose(
                "Initializing variables in TensorFlow Graph")
            sess.run(tf.compat.v1.initializers.global_variables())
            return sess, output_names
Exemple #19
0
 def add_to_script(self, script, suffix=None):
     G_LOGGER.verbose(
         "Attempting to load as a TensorFlow model, using TF2ONNX to convert to ONNX. "
         "If this is not correct, please specify --model-type",
         mode=LogMode.ONCE)
     script.add_import(imports=["OnnxFromTfGraph"],
                       frm="polygraphy.backend.onnx")
     loader_str = Script.invoke("OnnxFromTfGraph",
                                self.tf_loader_args.add_to_script(
                                    script,
                                    disable_outputs=True,
                                    suffix=suffix),
                                opset=self.opset,
                                fold_constant=self.fold_constant)
     loader_name = script.add_loader(loader_str,
                                     "export_onnx_from_tf",
                                     suffix=suffix)
     return loader_name
Exemple #20
0
    def __call__(self):
        """
        Creates a TensorFlow config.

        Returns:
            tf.ConfigProto: The TensorFlow config.
        """

        # Session configuration
        gpu_options = tf.compat.v1.GPUOptions(
            per_process_gpu_memory_fraction=self.gpu_memory_fraction,
            allow_growth=self.allow_growth)
        config = tf.compat.v1.ConfigProto(gpu_options=gpu_options)
        if self.use_xla:
            config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        G_LOGGER.verbose("Using gpu memory fraction: {:}, XLA: {:}".format(
            self.gpu_memory_fraction, self.use_xla))
        return config
Exemple #21
0
    def __call__(self):
        """
        Modifies an ONNX model.

        Returns:
            onnx.ModelProto: The modified ONNX model.
        """
        model, _ = misc.try_call(self._model)

        if self.do_shape_inference:
            model = onnx_util.infer_shapes(model)

        if self.outputs == constants.MARK_ALL:
            G_LOGGER.verbose("Marking all ONNX tensors as outputs")
            model = onnx_util.mark_layerwise(model)
        elif self.outputs is not None:
            model = onnx_util.mark_outputs(model, self.outputs)

        if self.exclude_outputs is not None:
            model = onnx_util.unmark_outputs(model, self.exclude_outputs)

        return onnx_util.check_model(model)
Exemple #22
0
    def __call__(self):
        """
        Loads a TensorFlow model from a checkpoint.

        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        # If `name` is not provided, this expects that the directory contains a `checkpoint` file with the contents:
        #
        # model_checkpoint_path: "model"
        # all_model_checkpoint_paths: "model"
        #
        # where "model" is the checkpoint name
        if self.name is None:
            G_LOGGER.verbose(
                "Checkpoint name was not explicitly provided, searching for `checkpoint` file"
            )
            checkpoint = tf.train.get_checkpoint_state(self.dir)
            if checkpoint is None:
                ckpt_file_contents = '\nmodel_checkpoint_path: "model"\nall_model_checkpoint_paths: "model"\n'
                G_LOGGER.critical(
                    "Checkpoint directory: {:} does not contain a `checkpoint` file, and the checkpoint name was"
                    "not provided. Please either create a checkpoint file with the contents:\n{:}"
                    "\nWhere `model` is the name of the checkpoint, or explicitly provide the name with"
                    "--ckpt, not including file extensions".format(
                        self.dir, ckpt_file_contents))
            input_checkpoint = checkpoint.model_checkpoint_path
        else:
            input_checkpoint = os.path.join(self.dir, self.name)

        meta_file = input_checkpoint + '.meta'
        with tf.Graph().as_default() as graph, tf.compat.v1.Session(
                graph=graph).as_default() as sess:
            saver = tf.compat.v1.train.import_meta_graph(meta_file,
                                                         clear_devices=True)
            saver.restore(sess, input_checkpoint)
            return graph, tf_util.get_graph_output_names(graph)
Exemple #23
0
    def infer(self, feed_dict):
        def is_dynamic_shape_input(binding):
            try:
                self.engine.get_profile_shape_input(0, binding)
                return True
            except RuntimeError:
                return False

        start_binding, end_binding = trt_util.get_active_profile_bindings(
            self.engine, self.context)
        for name, inp in feed_dict.items():
            binding = start_binding + self.engine[name]
            shape = inp.shape
            # Only set shapes if required.
            # get_shape/get_binding_shape will return what a shape input/data input is currently set to.
            if is_dynamic_shape_input(binding):
                G_LOGGER.verbose(
                    "Setting shape binding: {:} (index: {:}) to: {:}".format(
                        name, binding, inp))
                if tuple(self.context.get_shape(binding)) != tuple(inp):
                    self.context.set_shape_input(binding, inp)

            elif misc.is_shape_dynamic(self.engine.get_binding_shape(binding)):
                G_LOGGER.verbose(
                    "Setting binding: {:} (index: {:}) to shape: {:}".format(
                        name, binding, shape))
                if tuple(self.context.get_binding_shape(binding)) != tuple(
                        shape):
                    self.context.set_binding_shape(binding, shape)

        if not self.context.all_binding_shapes_specified:
            G_LOGGER.critical(
                "Some input shapes were not specified.\nNote: Network inputs are: {:}"
                .format(self.get_input_metadata()))
        if not self.context.all_shape_inputs_specified:
            G_LOGGER.critical(
                "Some shape inputs were not specified.\nNote: Network inputs are: {:}"
                .format(self.get_input_metadata()))

        # Inference
        # Need to resize output buffers
        self.buffers.resize(self.engine,
                            self.context,
                            start_binding=start_binding,
                            end_binding=end_binding)

        start = time.time()
        self.buffers.copy_inputs(feed_dict, self.stream)
        # Need to offset bindings in case the active profile is not 0.
        status = self.context.execute_async_v2(
            bindings=[0] * start_binding + self.buffers.bindings(),
            stream_handle=self.stream.address())
        if not status:
            G_LOGGER.critical(
                "Model execution failed. Please see the log messages above for details"
            )

        self.buffers.copy_outputs(self.stream)
        self.stream.synchronize()
        end = time.time()

        self.inference_time = end - start
        return self.buffers.outputs
Exemple #24
0
    def add_to_script(self, script, results_name):
        if self.load_results:
            G_LOGGER.verbose("Will load runner results from: {:}".format(
                self.load_results))
            script.add_import(imports=["misc"], frm="polygraphy.util")
            script.append_suffix(
                Script.format_str(
                    "\n# Load results\nfor load_output in {:}:\n{:}{results}.extend(misc.pickle_load(load_output))",
                    self.load_results,
                    Inline(constants.TAB),
                    results=results_name))

        if self.top_k is not None:
            script.add_import(imports=["PostprocessFunc"],
                              frm="polygraphy.comparator")
            script.append_suffix(
                Script.format_str(
                    "\n# Postprocessing - Apply Top-{top_k}\n{results} = Comparator.postprocess({results}, PostprocessFunc.topk_func(k={top_k}))",
                    top_k=self.top_k,
                    results=results_name))

        SUCCESS_VAR_NAME = Inline("success")
        script.append_suffix(
            "\n{success} = True".format(success=SUCCESS_VAR_NAME))

        if len(
                self.runners
        ) > 1 or self.load_results:  # Only do comparisons if there's actually something to compare.
            script.append_suffix("# Accuracy Comparison")

            compare_func_str = Script.invoke_if_nondefault(
                "CompareFunc.basic_compare_func",
                rtol=self.rtol,
                atol=self.atol,
                check_shapes=False if self.no_shape_check else None,
                fail_fast=self.fail_fast)
            compare_func = None
            if compare_func_str:
                script.add_import(imports=["CompareFunc"],
                                  frm="polygraphy.comparator")
                compare_func = "compare_func"
                script.append_suffix(
                    Script.format_str("{:} = {:}", Inline(compare_func),
                                      Inline(compare_func_str)))

            compare_accuracy = Script.invoke(
                "Comparator.compare_accuracy",
                results_name,
                compare_func=Inline(compare_func)
                if compare_func is not None else None,
                fail_fast=self.fail_fast)
            script.append_suffix(
                Script.format_str("{success} &= bool({:})\n",
                                  Inline(compare_accuracy),
                                  success=SUCCESS_VAR_NAME))
        if self.validate:
            script.append_suffix(
                "# Validation\n{success} &= Comparator.validate({results})\n".
                format(success=SUCCESS_VAR_NAME, results=results_name))

        return SUCCESS_VAR_NAME
Exemple #25
0
    def __getitem__(self, index):
        """
        Randomly generates input data.

        Args:
            index (int):
                    Since this class behaves like an iterable, it takes an index parameter.
                    Generated data is guaranteed to be the same for the same index.

            Returns:
                OrderedDict[str, numpy.ndarray]: A mapping of input names to input numpy buffers.
        """
        if index >= self.iterations:
            raise IndexError()

        G_LOGGER.verbose(
            "Generating data using numpy seed: {:}".format(self.seed + index))
        rng = np.random.RandomState(self.seed + index)

        def get_static_shape(name, shape):
            static_shape = shape
            if misc.is_shape_dynamic(shape):
                static_shape = misc.override_dynamic_shape(shape)
                if static_shape != shape and name not in self.user_input_metadata:
                    if not misc.is_valid_shape_override(static_shape, shape):
                        G_LOGGER.critical(
                            "Input tensor: {:24} | Cannot override original shape: {:} to {:}"
                            .format(name, shape, static_shape))
                    G_LOGGER.warning(
                        "Input tensor: {:24} | Will generate data of shape: {:} (tensor shape is: {:}).\n"
                        "If this is incorrect, please set input_metadata "
                        "or provide a custom data loader.".format(
                            name, static_shape, shape),
                        mode=LogMode.ONCE)
            return static_shape

        # Whether the user provided the values for a shape tensor input,
        # rather than the shape of the input.
        # If the shape is 1D, and has a value equal to the rank of the provided default shape, it is
        # likely to be a shape tensor, and so its value, not shape, should be overriden.
        def is_shape_tensor(name, dtype):
            if name not in self.input_metadata or name not in self.user_input_metadata:
                return False

            _, shape = self.input_metadata[name]
            is_shape = np.issubdtype(dtype, np.integer) and (
                not misc.is_shape_dynamic(shape)) and (len(shape) == 1)

            user_shape = self.user_input_metadata[name][1]
            is_shape &= len(user_shape) == shape[0]
            # Can't have negative values in shapes
            is_shape &= all([elem >= 0 for elem in user_shape])
            return is_shape

        def generate_buffer(name, dtype, shape):
            if is_shape_tensor(name, dtype):
                buffer = np.array(shape, dtype=dtype)
                G_LOGGER.info(
                    "Assuming {:} is a shape tensor. Setting input values to: {:}. If this is not correct, "
                    "please set it correctly in 'input_metadata' or by providing --input-shapes"
                    .format(name, buffer),
                    mode=LogMode.ONCE)
            elif np.issubdtype(dtype, np.integer):
                # high is 1 greater than the max int drawn
                buffer = rng.randint(low=self.int_range[0],
                                     high=self.int_range[1] + 1,
                                     size=shape,
                                     dtype=dtype)
            elif np.issubdtype(dtype, np.bool_):
                buffer = rng.randint(low=0, high=2, size=shape).astype(dtype)
            else:
                buffer = (rng.random_sample(size=shape) *
                          (self.float_range[1] - self.float_range[0]) +
                          self.float_range[0]).astype(dtype)

            buffer = np.array(
                buffer
            )  # To handle scalars, since the above functions return a float if shape is ().
            return buffer

        if self.input_metadata is None and self.user_input_metadata is not None:
            self.input_metadata = self.user_input_metadata

        buffers = OrderedDict()
        for name, (dtype, shape) in self.input_metadata.items():
            if name in self.user_input_metadata:
                user_dtype, user_shape = self.user_input_metadata[name]

                dtype = misc.default_value(user_dtype, dtype)

                is_valid_shape_override = user_shape is not None and misc.is_valid_shape_override(
                    user_shape, shape)
                if not is_valid_shape_override and not is_shape_tensor(
                        name, dtype):
                    G_LOGGER.warning(
                        "Input tensor: {:24} | Cannot use provided custom shape: {:}, since this input has "
                        "a static shape: {:}".format(name, user_shape, shape),
                        mode=LogMode.ONCE)
                else:
                    shape = misc.default_value(user_shape, shape)

            static_shape = get_static_shape(name, shape)
            buffers[name] = generate_buffer(name, dtype, shape=static_shape)

        # Warn about unused metadata
        for name in self.user_input_metadata.keys():
            if name not in self.input_metadata:
                msg = "Input tensor: {:24} | Metadata was provided, but the input does not exist in one or more runners.".format(
                    name)
                close_match = misc.find_in_dict(name, self.input_metadata)
                if close_match:
                    msg += "\nMaybe you meant to set: {:}".format(close_match)
                G_LOGGER.warning(msg)

        return buffers
Exemple #26
0
    def run(runners,
            data_loader=None,
            warm_up=None,
            use_subprocess=None,
            subprocess_timeout=None,
            subprocess_polling_interval=None,
            save_inputs_path=None):
        """
        Runs the supplied runners sequentially.

        Args:
            runners (List[BaseRunner]):
                    A list of runners to run.
            data_loader (Generator -> OrderedDict[str, numpy.ndarray]):
                    A generator or iterable that yields a dictionary that maps input names to input numpy buffers.
                    In the simplest case, this can be a `List[Dict[str, numpy.ndarray]]` .

                    In case you don't know details about the inputs ahead of time, you can access the
                    `input_metadata` property in your data loader, which will be set to an `TensorMetadata`
                    instance by this function.
                    Note that this does not work for generators or lists.

                    The number of iterations run by this function is controlled by the number of items supplied
                    by the data loader.

                    Defaults to an instance of `DataLoader`.
            warm_up (int):
                    The number of warm up runs to perform for each runner before timing.
                    Defaults to 0.
            use_subprocess (bool):
                    Whether each runner should be run in a subprocess. This allows each runner to have exclusive
                    access to the GPU. When using a subprocess, runners and loaders will never be modified.
            subprocess_timeout (int):
                    The timeout before a subprocess is killed automatically. This is useful for handling processes
                    that never terminate. A value of None disables the timeout. Defaults to None.
            subprocess_polling_interval (int):
                    The polling interval, in seconds, for checking whether a subprocess has completed or crashed.
                    In rare cases, omitting this parameter when subprocesses are enabled may cause this function
                    to hang indefinitely if the subprocess crashes.
                    A value of 0 disables polling. Defaults to 30 seconds.
            save_inputs_path (str):
                    [EXPERIMENTAL] Path at which to save inputs used during inference. This will include all inputs generated by
                    the provided data_loader, and will be saved as a pickled List[Dict[str, numpy.ndarray]].

        Returns:
            RunResults:
                    A mapping of runner names to the results of their inference.
                    The ordering of `runners` is preserved in this mapping.
        """
        warm_up = misc.default_value(warm_up, 0)
        data_loader = misc.default_value(data_loader, DataLoader())
        use_subprocess = misc.default_value(use_subprocess, False)
        subprocess_polling_interval = misc.default_value(
            subprocess_polling_interval, 30)
        loader_cache = DataLoaderCache(data_loader,
                                       save_inputs_path=save_inputs_path)

        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                input_metadata = active_runner.get_input_metadata()
                G_LOGGER.info("Runner: {:40} | Input Metadata: {:}".format(
                    active_runner.name, input_metadata),
                              mode=LogMode.ONCE)
                # DataLoaderCache will ensure that the feed_dict does not contain any extra entries
                # based on the provided input_metadata.
                loader_cache.set_input_metadata(input_metadata)

                if warm_up:
                    G_LOGGER.start(
                        "Runner: {:40} | Running {:} warm-up runs".format(
                            active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning(
                            "{:} warm-up runs were requested, but data loader did not supply any data. "
                            "Skipping warm-up runs".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose(
                            "Warm-up Input Buffers:\n{:}".format(
                                misc.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for i in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)

                # Then, actual iterations.
                index = 0
                iteration_results = []
                output_metadata = TensorMetadata()

                for index, feed_dict in enumerate(loader_cache):
                    G_LOGGER.extra_verbose(
                        lambda: "Runner: {:40} | Feeding inputs:\n{:}".format(
                            active_runner.name, misc.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    # Without a deep copy here, outputs will always reference the output of the last run
                    iteration_results.append(
                        IterationResult(outputs=copy.deepcopy(outputs),
                                        runtime=runtime,
                                        runner_name=active_runner.name))

                    if index == 0:
                        for name, out in outputs.items():
                            output_metadata.add(name, out.dtype, out.shape)

                    G_LOGGER.info(
                        "Runner: {:40} | Output Metadata: {:}".format(
                            active_runner.name, output_metadata),
                        mode=LogMode.ONCE)
                    G_LOGGER.extra_verbose(
                        lambda:
                        "Runner: {:40} | Inference Time: {:.3f} ms | Received outputs:\n{:}"
                        .format(active_runner.name, runtime * 1000.0,
                                misc.indent_block(outputs)))

                G_LOGGER.finish(
                    "Runner: {:40} | Completed {:} iterations.".format(
                        active_runner.name, index + 1))
                return iteration_results

        # Wraps execute_runner to use a queue.
        def execute_runner_with_queue(runner_queue, runner, loader_cache):
            iteration_results = None
            try:
                iteration_results = execute_runner(runner, loader_cache)
            except:
                # Cannot send the exception back, as it is not necessarily pickleable
                import traceback
                G_LOGGER.error(traceback.format_exc())
            misc.try_send_on_queue(runner_queue, iteration_results)
            # After finishing, send the updated loader_cache back.
            misc.try_send_on_queue(runner_queue, loader_cache)

        # Do all inferences in one loop, then comparisons at a later stage.
        # We run each runner in a separate process so that we can provide exclusive GPU access for each runner.
        run_results = RunResults()
        for runner in runners:
            G_LOGGER.start(
                "Runner: {:40} | Activating and starting inference".format(
                    runner.name))
            if use_subprocess:
                runner_queue = Queue()
                process = Process(target=execute_runner_with_queue,
                                  args=(runner_queue, runner, loader_cache))
                process.start()

                # If a subprocess hangs in a certain way, then process.join could block forever. Hence,
                # we need to keep polling the process to make sure it really is alive.
                iteration_results = None
                while process.is_alive() and iteration_results is None:
                    try:
                        iteration_results = misc.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                        # Receive updated loader cache, or fall back if it could not be sent.
                        loader_cache = misc.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                    except queue.Empty:
                        G_LOGGER.extra_verbose(
                            "Polled subprocess - still running")

                try:
                    assert iteration_results is not None
                    run_results.append((runner.name, iteration_results))
                    process.join(subprocess_timeout)
                except:
                    G_LOGGER.critical(
                        "Runner: {:40} | Terminated prematurely. Check the exception logged above. "
                        "If there is no exception logged above, make sure not to use the --use-subprocess "
                        "flag or set use_subprocess=False in Comparator.run()."
                        .format(runner.name))
                finally:
                    process.terminate()

                if loader_cache is None:
                    G_LOGGER.critical(
                        "Could not send data loader cache to runner subprocess. Please try disabling subprocesses "
                        "by removing the --use-subprocess flag, or setting use_subprocess=False in Comparator.run()"
                    )
            else:
                run_results.append(
                    (runner.name, execute_runner(runner, loader_cache)))

        G_LOGGER.verbose("Successfully ran: {:}".format(
            [r.name for r in runners]))
        return run_results
Exemple #27
0
    def parse(self, args):
        def determine_model_type():
            if tools_util.get(args, "model_type") is not None:
                return args.model_type.lower()

            if tools_util.get(args, "model_file") is None:
                return None

            def use_ext(ext_mapping):
                file_ext = os.path.splitext(args.model_file)[-1]
                if file_ext in ext_mapping:
                    return ext_mapping[file_ext]

            runners = misc.default_value(tools_util.get(args, "runners"), [])
            if tools_util.get(args, "ckpt") or os.path.isdir(args.model_file):
                return "ckpt"
            elif "tf" in runners or "trt_legacy" in runners:
                if args.caffe_model:
                    return "caffe"
                ext_mapping = {
                    ".hdf5": "keras",
                    ".uff": "uff",
                    ".prototxt": "caffe",
                    ".onnx": "onnx",
                    ".engine": "engine",
                    ".plan": "engine"
                }
                return use_ext(ext_mapping) or "frozen"
            else:
                # When no framework is provided, some extensions can be ambiguous
                ext_mapping = {
                    ".hdf5": "keras",
                    ".graphdef": "frozen",
                    ".onnx": "onnx",
                    ".uff": "uff",
                    ".engine": "engine",
                    ".plan": "engine"
                }
                model_type = use_ext(ext_mapping)
                if model_type:
                    return model_type

            G_LOGGER.critical(
                "Could not automatically determine model type for: {:}\n"
                "Please explicitly specify the type with the --model-type option"
                .format(args.model_file))

        if tools_util.get(args, "model_file"):
            G_LOGGER.verbose("Model: {:}".format(args.model_file))
            if not os.path.exists(args.model_file):
                G_LOGGER.warning("Model path does not exist: {:}".format(
                    args.model_file))
            args.model_file = os.path.abspath(args.model_file)

        if tools_util.get(args, "input_shapes"):
            self.input_shapes = tools_util.parse_meta(
                tools_util.get(args, "input_shapes"),
                includes_dtype=False)  # TensorMetadata
        else:
            self.input_shapes = TensorMetadata()

        self.model_file = args.model_file
        self.model_type = misc.default_value(self._model_type,
                                             determine_model_type())
Exemple #28
0
        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                input_metadata = active_runner.get_input_metadata()
                G_LOGGER.verbose("Runner: {:40} | Input Metadata:\n{:}".format(
                    active_runner.name, misc.indent_block(input_metadata)))
                loader_cache.set_input_metadata(input_metadata)

                if warm_up:
                    G_LOGGER.info(
                        "Runner: {:40} | Running {:} warm-up runs".format(
                            active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning(
                            "{:} warm-up runs were requested, but data loader did not supply any data. "
                            "Skipping warm-up runs".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose(
                            "Warm-up Input Buffers:\n{:}".format(
                                misc.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for i in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)

                # Then, actual iterations.
                total_time = 0
                run_results = []
                for feed_dict in loader_cache:
                    G_LOGGER.extra_verbose(
                        lambda: "Runner: {:40} | Feeding inputs:\n{:}".format(
                            active_runner.name, misc.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    # Without a deep copy here, outputs will always reference the output of the last run
                    run_results.append(
                        IterationResult(outputs=copy.deepcopy(outputs),
                                        runtime=runtime,
                                        runner_name=active_runner.name))

                    if len(run_results) == 1:
                        output_metadata = TensorMetadata()
                        for name, out in outputs.items():
                            output_metadata.add(name, out.dtype, out.shape)

                    G_LOGGER.verbose(
                        "Runner: {:40} | Output Metadata:\n{:}".format(
                            active_runner.name,
                            misc.indent_block(output_metadata)),
                        mode=LogMode.ONCE)
                    G_LOGGER.extra_verbose(
                        lambda:
                        "Runner: {:40} | Inference Time: {:.3f} ms | Received outputs:\n{:}"
                        .format(active_runner.name, runtime * 1000.0,
                                misc.indent_block(outputs)))

                G_LOGGER.info(
                    "Runner: {:40} | Completed {:} iterations.".format(
                        active_runner.name, len(run_results)))
                return run_results
Exemple #29
0
    def add_to_script(self, script, disable_outputs=None, suffix=None):
        if disable_outputs:
            outputs = None
        else:
            outputs = tools_util.get_outputs_for_script(script, self.outputs)

        model_file = self.model_args.model_file
        model_type = self.model_args.model_type

        if model_type == "ckpt":
            G_LOGGER.verbose(
                "Loading a TensorFlow checkpoint. Please ensure you are not using the --use-subprocess flag"
                .format(model_file),
                mode=LogMode.ONCE)
            script.add_import(imports=["GraphFromCkpt"],
                              frm="polygraphy.backend.tf")
            loader_id = "load_ckpt"
            loader_str = Script.invoke("GraphFromCkpt", model_file, self.ckpt)
        elif model_type == "keras":
            script.add_import(imports=["GraphFromKeras"],
                              frm="polygraphy.backend.tf")
            loader_id = "load_keras"
            loader_str = Script.invoke("GraphFromKeras", model_file)
        else:
            script.add_import(imports=["GraphFromFrozen"],
                              frm="polygraphy.backend.tf")
            G_LOGGER.verbose(
                "Attempting to load as a frozen graph. If this is not correct, please specify --model-type",
                mode=LogMode.ONCE)
            loader_id = "load_frozen"
            loader_str = Script.invoke("GraphFromFrozen", model_file)

        loader_name = script.add_loader(loader_str, loader_id, suffix=suffix)

        if self.freeze_graph:
            script.add_import(imports=["OptimizeGraph"],
                              frm="polygraphy.backend.tf")
            loader_name = script.add_loader(Script.invoke(
                "OptimizeGraph", loader_name),
                                            "optimize_graph",
                                            suffix=suffix)
        if self.tftrt:
            script.add_import(imports=["UseTfTrt"],
                              frm="polygraphy.backend.tf")
            loader_str = Script.invoke(
                "UseTfTrt",
                loader_name,
                max_workspace_size=self.trt_loader_args.workspace,
                fp16=self.trt_loader_args.fp16,
                int8=self.trt_loader_args.int8,
                max_batch_size=self.trt_legacy_args.batch_size,
                is_dynamic_op=self.dynamic_op,
                minimum_segment_size=self.minimum_segment_size)
            loader_name = script.add_loader(loader_str,
                                            "use_tftrt",
                                            suffix=suffix)

        MODIFY_TF = "ModifyGraph"
        modify_tf_str = Script.invoke(MODIFY_TF, loader_name, outputs=outputs)
        if modify_tf_str != Script.invoke(MODIFY_TF, loader_name):
            script.add_import(imports=[MODIFY_TF], frm="polygraphy.backend.tf")
            loader_name = script.add_loader(modify_tf_str, "modify_tf")

        engine_dir = None
        if self.tftrt:
            engine_dir = self.trt_runner_args.save_engine

        WRITE_TF = "SaveGraph"
        write_tf_str = Script.invoke(WRITE_TF,
                                     loader_name,
                                     path=self.save_pb,
                                     tensorboard_dir=self.save_tensorboard,
                                     engine_dir=engine_dir)
        if write_tf_str != Script.invoke(WRITE_TF, loader_name):
            script.add_import(imports=[WRITE_TF], frm="polygraphy.backend.tf")
            loader_name = script.add_loader(write_tf_str, "save_tf")

        return loader_name
Exemple #30
0
def get_output_metadata(graph, layerwise=False):
    graphdef = graph.as_graph_def()

    node_output_map = map_node_outputs(graphdef)

    def is_output_node(node):
        # Make sure that we're not using hanging nodes as outputs - must have at least one input.
        if len(node_output_map[node.name]) != 0 or len(node.input) == 0:
            return False

        # Tensors with no shape cannot be outputs and TensorFlow doesn't like certain ops as outputs.
        EXCLUDE_OPS = [
            "Switch",
            "FusedBatchNorm",
            "Assert",
            "NextIteration",
            "Enter",
            "LoopCond",
            "Exit",
            "Print",
            "Assign",
            "NoOp",
            "ReadVariableOp",
            "VarIsInitializedOp",
            "Const"
            ]

        # Additionally, we sometimes need to exclude entire namespaces e.g. while loops.
        EXCLUDE_NAMESPACES = ["while", "Assert"]

        if any([ex_op in node.op for ex_op in EXCLUDE_OPS]) or any([ns in node.name for ns in EXCLUDE_NAMESPACES]):
            G_LOGGER.extra_verbose("Excluding {:}, op {:} is not a valid output op or is part of an excluded namespace "
                             "(Note: excluded namespaces: {:})".format(node.name, node.op, EXCLUDE_NAMESPACES))
            return False

        return True

    # For layerwise mode, every layer becomes an output.
    if layerwise:
        output_nodes = list(graphdef.node)
        G_LOGGER.verbose("Running in layerwise mode. Marking {:} layers as potential outputs".format(len(output_nodes)))
    else:
        output_nodes = [node for node in graphdef.node if is_output_node(node)]
    G_LOGGER.extra_verbose("Found likely output nodes: {:}".format(output_nodes))

    output_tensors = []
    for node in output_nodes:

        tensor_name = node.name + ":0"
        try:
            tensor = graph.get_tensor_by_name(tensor_name)
            output_tensors.append(tensor)
        except KeyError:
            G_LOGGER.warning("Could not import: {:}. Skipping.".format(tensor_name))
    if len(output_tensors) != len(output_nodes):
        G_LOGGER.warning("Excluded {:} ops that don't seem like outputs. Use -vv/--super-verbose, or set "
                        "logging verbosity to EXTRA_VERBOSE to view them.".format(len(output_nodes) - len(output_tensors)))

    G_LOGGER.extra_verbose("Found output op types in graph: {:}".format(set([tensor.op.type for tensor in output_tensors])))
    G_LOGGER.verbose("Retrieved TensorFlow output_tensors: {:}".format(output_tensors))
    return get_tensor_metadata(output_tensors)