예제 #1
0
    def __call__(self):
        """
        Converts a TensorFlow model into ONNX.

        Returns:
            onnx.ModelProto: The ONNX model.
        """
        import tensorflow as tf
        import tf2onnx
        from polygraphy.backend.tf import util as tf_util

        misc.log_module_info(tf2onnx)

        (graph, output_names), _ = misc.try_call(self._graph)
        input_names = list(tf_util.get_input_metadata(graph).keys())

        if self.fold_constant:
            G_LOGGER.info("Folding constants in graph using tf2onnx.tfonnx.tf_optimize")
        graphdef = graph.as_graph_def()
        if self.optimize:
            graphdef = tf2onnx.tfonnx.tf_optimize(input_names, output_names, graph.as_graph_def(), fold_constant=self.fold_constant)

        with tf.Graph().as_default() as graph, tf.compat.v1.Session(graph=graph) as sess:
            tf.import_graph_def(graphdef, name="")

            onnx_graph = tf2onnx.tfonnx.process_tf_graph(graph, input_names=input_names, output_names=output_names, opset=self.opset)
            if self.optimize:
                onnx_graph = tf2onnx.optimizer.optimize_graph(onnx_graph)
            return onnx_util.check_model(onnx_graph.make_model("model"))
예제 #2
0
        def write_calibration_cache(self, cache):
            self.cache_contents = cache.tobytes()
            self.has_cached_scales = True

            if self._cache is None:
                return

            try:
                if self._cache.seekable():
                    self._cache.seek(0)
                bytes_written = self._cache.write(self.cache_contents)
                if bytes_written != len(self.cache_contents):
                    G_LOGGER.warning(
                        "Could not write entire cache. Note: cache contains {:} bytes, but only "
                        "{:} bytes were written".format(
                            len(self.cache_contents), bytes_written))
            except AttributeError:
                G_LOGGER.info("Writing calibration cache to: {:}".format(
                    self._cache))
                with open(self._cache, "wb") as f:
                    f.write(self.cache_contents)
            except:
                # Cache is not writable
                return
            else:
                self._cache.flush()
예제 #3
0
    def __call__(self):
        """
        Writes out artifacts from a TensorFlow Graph.

        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        (graph, outputs), _ = misc.try_call(self._graph)

        misc.lazy_write(
            contents=lambda: graph.as_graph_def().SerializeToString(),
            path=self.path)
        if self.tensorboard_dir:
            G_LOGGER.info("Writing tensorboard events to {:}".format(
                self.tensorboard_dir))
            train_writer = tf.compat.v1.summary.FileWriter(
                self.tensorboard_dir)
            train_writer.add_graph(graph)

        if self.engine_dir is not None:
            graphdef = graph.as_graph_def()
            segment_number = 0
            for node in graphdef.node:
                if node.op == "TRTEngineOp":
                    engine = node.attr["serialized_segment"].s
                    if self.engine_dir is not None:
                        misc.lazy_write(
                            contents=engine,
                            path=os.path.join(
                                self.engine_dir,
                                "segment-{:}".format(segment_number)))
                    segment_number += 1

        return graph, outputs
예제 #4
0
        def generate_buffer(name, dtype, shape):
            if is_shape_tensor(name, dtype):
                buffer = np.array(shape, dtype=dtype)
                G_LOGGER.info(
                    "Assuming {:} is a shape tensor. Setting input values to: {:}. If this is not correct, "
                    "please set it correctly in 'input_metadata' or by providing --input-shapes"
                    .format(name, buffer),
                    mode=LogMode.ONCE)
            elif np.issubdtype(dtype, np.integer):
                # high is 1 greater than the max int drawn
                buffer = rng.randint(low=self.int_range[0],
                                     high=self.int_range[1] + 1,
                                     size=shape,
                                     dtype=dtype)
            elif np.issubdtype(dtype, np.bool_):
                buffer = rng.randint(low=0, high=2, size=shape).astype(dtype)
            else:
                buffer = (rng.random_sample(size=shape) *
                          (self.float_range[1] - self.float_range[0]) +
                          self.float_range[0]).astype(dtype)

            buffer = np.array(
                buffer
            )  # To handle scalars, since the above functions return a float if shape is ().
            return buffer
예제 #5
0
        def fix_graph(graph, model):
            """
            Fix the graph so it is valid ONNX.
            """

            def fix_tensor_metadata(tensors, fix_shape=True):
                for tensor in tensors:
                    if not tensor.shape and fix_shape:
                        tensor.shape = layerwise(model)[tensor.name].shape
                    if not tensor.dtype:
                        tensor.dtype = layerwise(model)[tensor.name].dtype

            fix_tensor_metadata(graph.inputs)
            fix_tensor_metadata(graph.outputs, fix_shape=False)

            # If we're marking inputs, there may be cases where some other inputs are required - for
            # example, if the model is branchy. If, after cleanup(), there are any Variable tensors in
            # the graph without inputs, we'll replace them with constants and fold them away.
            tensor_map = graph.tensors()
            needs_const_fold = False
            for tensor in tensor_map.values():
                if isinstance(tensor, gs.Variable) and not tensor.inputs and tensor not in graph.inputs:
                    needs_const_fold = True
                    G_LOGGER.info("Freezing model input: {:}".format(tensor))
                    tensor.to_constant(layerwise(model, include_data=True)[tensor.name])

            if needs_const_fold:
                G_LOGGER.info("Folding constants to remove extraneous subgraphs")
                graph.fold_constants().cleanup()

            return graph
예제 #6
0
    def __call__(self):
        uff_model, input_names, input_shapes, output_names = self.uff_loader()

        builder = trt.Builder(TRT_LOGGER)
        network = builder.create_network()
        parser = trt.UffParser()
        # Input names should come from the converter, as a preprocessing script may have been applied to the frozen model.
        for name, shape in zip(input_names, input_shapes):
            # Default order is NCHW, only set to NHWC if we're reasonably certain that it is.
            input_order = self.uff_order
            if not self.uff_order:
                input_order = trt.UffInputOrder.NCHW
                if FormatManager.determine_format(shape) == DataFormat.NHWC:
                    input_order = trt.UffInputOrder.NHWC
            shape = shape[1:]
            G_LOGGER.verbose(
                "Registering UFF input: {:} with shape: {:} and input order: {:}"
                .format(name, shape, input_order))
            parser.register_input(name, shape, input_order)

        if output_names and output_names != constants.MARK_ALL:
            for name in output_names:
                G_LOGGER.verbose("Registering UFF output: " + str(name))
                parser.register_output(name)

        G_LOGGER.info(
            "Parsing UFF model with inputs: {:} and outputs: {:}".format(
                input_names, output_names))
        success = parser.parse_buffer(uff_model, network)
        if not success:
            G_LOGGER.critical("Could not parse UFF correctly")
        return builder, network, parser, input_shapes[0][0]
예제 #7
0
    def __call__(self):
        """
        Builds a TensorRT engine.

        Returns:
            trt.ICudaEngine: The engine that was created.
        """
        # If network is a callable, then we own its return value
        ret, owning = misc.try_call(self._network)
        builder, network, parser = misc.unpack_args(ret, num=3)

        with contextlib.ExitStack() as stack:
            provided = "Builder and Network" if parser is None else "Builder, Network, and Parser"
            if owning:
                stack.enter_context(builder)
                stack.enter_context(network)
                if parser is not None:
                    stack.enter_context(parser)
            else:
                G_LOGGER.verbose("{:} were provided directly instead of via a Callable. This loader will not assume ownership. "
                               "Please ensure that they are freed.".format(provided))

            network_log_mode = "full" if G_LOGGER.severity <= G_LOGGER.ULTRA_VERBOSE else "attrs"
            G_LOGGER.super_verbose(lambda: ("Displaying TensorRT Network:\n" + trt_util.str_from_network(network, mode=network_log_mode)))

            config, _ = misc.try_call(self._config, builder, network)
            G_LOGGER.info("Building engine with configuration: {:}".format(trt_util.str_from_config(config)))
            engine = builder.build_engine(network, config)
            if not engine:
                G_LOGGER.critical("Invalid Engine. Please ensure the engine was built correctly")

            if hasattr(config.int8_calibrator, "free"):
                config.int8_calibrator.free()

            return engine
예제 #8
0
 def layerwise(model, include_data=False):
     nonlocal _layerwise_outputs, _layerwise_meta
     if _layerwise_outputs is None or _layerwise_meta is None:
         G_LOGGER.info(
             "Running inference with ONNX-Runtime to determine metadata for intermediate tensors.\n"
             "This will cause intermediate models to have static shapes."
         )
         _layerwise_outputs, _layerwise_meta = self.arg_groups[OnnxShapeInferenceArgs].fallback_inference(model)
     return _layerwise_outputs if include_data else _layerwise_meta
예제 #9
0
    def __call__(self, builder, network):
        """
        Creates a TensorRT IBuilderConfig that can be used by the EngineFromNetwork.

        Args:
            builder (trt.Builder):
                    The TensorRT builder to use to create the configuration.
            network (trt.INetworkDefinition):
                    The TensorRT network for which to create the config. The network is used to
                    automatically create a default optimization profile if none are provided.

        Returns:
            trt.IBuilderConfig: The TensorRT builder configuration.
        """
        with misc.FreeOnException([builder.create_builder_config()
                                   ]) as (config, ):
            calibration_profile = None
            for profile in self.profiles:
                calibration_profile = trt_util.build_profile(
                    builder, network, profile)
                config.add_optimization_profile(calibration_profile)
            if not self.profiles:
                calibration_profile = trt_util.build_default_profile(
                    builder, network)
                config.add_optimization_profile(calibration_profile)

            if self.profiles:
                G_LOGGER.info("Configuring with profiles: {:}".format(
                    self.profiles))

            config.max_workspace_size = int(self.max_workspace_size)

            if self.strict_types:
                config.set_flag(trt.BuilderFlag.STRICT_TYPES)
            if not self.tf32:
                with contextlib.suppress(AttributeError):
                    config.clear_flag(trt.BuilderFlag.TF32)
            if self.fp16:
                config.set_flag(trt.BuilderFlag.FP16)
            if self.int8:
                config.set_flag(trt.BuilderFlag.INT8)
                if not network.has_explicit_precision:
                    if self.calibrator is not None:
                        input_metadata = trt_util.get_input_metadata_from_profile(
                            calibration_profile, network)
                        with contextlib.suppress(AttributeError):
                            self.calibrator.reset(input_metadata)
                        config.int8_calibrator = self.calibrator
                        with contextlib.suppress(AttributeError):
                            config.set_calibration_profile(calibration_profile)
                    else:
                        G_LOGGER.warning(
                            "Network does not have explicit precision and no calibrator was provided. Please ensure "
                            "that tensors in the network have dynamic ranges set, or provide a calibrator in order to use int8 mode."
                        )
            return config
예제 #10
0
 def run(self, command):
     G_LOGGER.info("Running: {:} from cwd: {:}".format(command, self.path))
     env = copy.copy(os.environ)
     env["PYTHONPATH"] = ROOT_DIR
     env["PATH"] = os.path.join(ROOT_DIR, "bin") + os.path.pathsep + env["PATH"]
     # Remove whitespace args and escaped newlines
     command = [arg for arg in command.strip().split(" ") if arg.strip() and arg != "\\\n"]
     status = sp.run(command, cwd=self.path, env=env, stdout=sp.PIPE, stderr=sp.PIPE, universal_newlines=True)
     assert status.returncode == 0, status.stdout + "\n" + status.stderr
     return status
예제 #11
0
    def __init__(self,
                 seed=None,
                 iterations=None,
                 input_metadata=None,
                 int_range=None,
                 float_range=None):
        """
        Args:
            seed (int):
                    The seed to use when generating random inputs.
                    Defaults to ``util.constants.DEFAULT_SEED``.
            iterations (int):
                    The number of iterations for which to supply data.
                    Defaults to 1.
            input_metadata (TensorMetadata):
                    A mapping of input names to their corresponding shapes and data types.
                    This will be used to determine what shapes to supply for inputs with dynamic shape, as
                    well as to set the data type of the generated inputs.
                    If either dtype or shape are None, then the value will be automatically determined.
                    For input shape tensors, i.e. inputs whose *value* describes a shape in the model, the
                    provided shape will be used to populate the values of the inputs, rather than to determine
                    their shape.
            int_range (Tuple[int]):
                    A tuple containing exactly 2 integers, indicating the minimum and maximum integer values (inclusive)
                    the data loader should generate. If either value in the tuple is None, the default will be used
                    for that value.
                    If None is provided instead of a tuple, then the default values will be used for both the
                    minimum and maximum.
            float_range (Tuple[float]):
                    A tuple containing exactly 2 floats, indicating the minimum and maximum float values (inclusive)
                    the data loader should generate. If either value in the tuple is None, the default will be used
                    for that value.
                    If None is provided instead of a tuple, then the default values will be used for both the
                    minimum and maximum.
        """
        def default_tuple(tup, default):
            if tup is None:
                return default
            new_tup = []
            for elem, default_elem in zip(tup, default):
                new_tup.append(misc.default_value(elem, default_elem))
            return tuple(new_tup)

        self.seed = misc.default_value(seed, DEFAULT_SEED)
        self.iterations = misc.default_value(iterations, 1)
        self.user_input_metadata = misc.default_value(input_metadata, {})
        self.int_range = default_tuple(int_range, (1, 25))
        self.float_range = default_tuple(float_range, (-1.0, 1.0))
        self.input_metadata = None

        if self.user_input_metadata:
            G_LOGGER.info(
                "Will generate inference input data according to provided TensorMetadata: {}"
                .format(self.user_input_metadata))
예제 #12
0
        def mark_io(graph, attr, tensors, filter_const=True):
            if filter_const:
                tensors = [t for t in tensors if not isinstance(t, gs.Constant)]

            if not tensors:
                G_LOGGER.warning(
                    "No non-constant tensors are available to mark. "
                    "Try folding constants in the model with `polygraphy surgeon sanitize --fold-constants`"
                )

            setattr(graph, attr, tensors)
            G_LOGGER.info("Marking model {attr}: {:}".format(getattr(graph, attr), attr=attr))
            return graph
예제 #13
0
    def __init__(self,
                 network_loader=None,
                 max_workspace_size=None,
                 max_batch_size=None,
                 fp16=None,
                 tf32=None,
                 load_engine=None,
                 save_engine=None,
                 layerwise=False,
                 plugins=[],
                 name=None):
        """
        Creates a runner that manages a single TensorRT engine.


            network_loader (BaseModelLoader):
                    A loader that returns a TRT builder, network, parser and input shapes.
            max_workspace_size (int): The maximum workspace size.
            max_batch_size (int): The maximum batch size.
            fp16 (bool): Whether to run in fp16 mode
            layerwise (bool): Whether to retrieve the outputs of every layer in the network.
            name (str):
                    The human-readable name prefix to use for this runner.
                    A runner count and timestamp will be appended to this prefix.
        """
        G_LOGGER.warning(
            "TrtLegacyRunner is deprecated, and will be removed in a future release"
        )
        # Load any user-supplied plugin libraries. This must happen before everything else, including engine deserialization.
        if plugins:
            import ctypes
            for plugin in plugins:
                path = os.path.abspath(plugin)
                G_LOGGER.info("Loading plugin library: {:}".format(path))
                ctypes.CDLL(path)

        # Choose a unique name for this runner.
        super().__init__(name=name, prefix="trt-legacy-runner")

        # Save parameters for activate and deactivate.
        self.network_loader = network_loader
        self.max_workspace_size = misc.default_value(max_workspace_size,
                                                     1 << 24)
        self.fp16 = misc.default_value(fp16, False)
        self.tf32 = misc.default_value(tf32, False)
        self.load_engine = load_engine

        self.engine_path = save_engine

        self.layerwise = layerwise
        self.max_batch_size = max_batch_size
예제 #14
0
            def load_from_cache():
                if self._cache is None:
                    return None

                try:
                    if self._cache.seekable():
                        self._cache.seek(0)
                    return self._cache.read()
                except AttributeError:
                    if os.path.exists(self._cache):
                        G_LOGGER.info("Reading calibration cache from: {:}".format(self._cache), mode=LogMode.ONCE)
                        with open(self._cache, "rb") as f:
                            return f.read()
                except:
                    # Cache is not readable
                    return None
예제 #15
0
    def __call__(self):
        """
        Optimizes a TensorFlow model using TF-TRT.

        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        from tensorflow.contrib import tensorrt as tf_trt

        (graph, output_names), _ = misc.try_call(self._graph)

        precision_mode = "FP16" if self.fp16 else "FP32"
        precision_mode = "INT8" if self.int8 else precision_mode

        G_LOGGER.info(
            "For TF-TRT, using outputs={:}, max_workspace_size_bytes={:}, max_batch_size={:}, "
            "minimum_segment_size={:}, is_dynamic_op={:}, precision_mode={:}".
            format(output_names, self.max_workspace_size, self.max_batch_size,
                   self.minimum_segment_size, self.is_dynamic_op,
                   precision_mode))

        graphdef = tf_trt.create_inference_graph(
            graph.as_graph_def(),
            outputs=output_names,
            max_workspace_size_bytes=self.max_workspace_size,
            max_batch_size=self.max_batch_size,
            minimum_segment_size=self.minimum_segment_size,
            is_dynamic_op=self.is_dynamic_op,
            precision_mode=precision_mode)

        segment_number = 0
        for node in graphdef.node:
            if node.op == "TRTEngineOp":
                engine = node.attr["serialized_segment"].s
                segment_number += 1
        G_LOGGER.info(
            "Found {:} engines in TFTRT graph".format(segment_number))

        with tf.Graph().as_default() as graph:
            tf.import_graph_def(graphdef, name="")
            return graph, tf_util.get_graph_output_names(graph)
예제 #16
0
        def compare_output(iter_result0, iter_result1):
            """
            Compare the outputs of two runners from a single iteration.

            This function will always iterate over the output names of the first IterationResult,
                and attempt to find corresponding output names in the second.
            If no corresponding output name is found, the output is skipped.
            If all output names are skipped, then this function raises an error.

            Args:
                iter_result0 (IterationResult): The result of the first runner.
                iter_result1 (IterationResult): The result of the second runner.

            Returns:
                OrderedDict[str, OutputCompareResult]:
                        The name of the outputs compared, derived from the first IterationResult,
                        and whether they matched. If an output name is not found, it is omitted from this dictionary.

            Raises:
                PolygraphyException: If all output names are skipped, and thus no outputs are compared.
            """
            # Returns whether the outputs match
            def check_outputs_match(out0, out0_name, out1, out1_name, per_out_rtol, per_out_atol):
                def compute_max(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.amax(buffer)

                # Returns index of max value
                def compute_argmax(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.unravel_index(np.argmax(buffer), buffer.shape)

                def compute_min(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.amin(buffer)

                # Returns index of min value
                def compute_argmin(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.unravel_index(np.argmin(buffer), buffer.shape)

                def compute_mean(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.mean(buffer)


                def compute_required():
                    # The purpose of this function is to determine the minimum tolerances such that
                    # the outputs would be considered a match.
                    # The NumPy formula for np.isclose is absolute(out0 - out1) <= (per_out_atol + per_out_rtol * absolute(out1))
                    # So, for both absolute/relative tolerance, given either one,
                    # we can compute the required value for the other:
                    # per_out_atol = absolute(out0 - out1)
                    # atol_if_rtol = absolute(out0 - out1)  - per_out_rtol * absolute(out1)
                    # per_out_rtol = (absolute(out0 - out1) - per_out_atol) / absolute(out1)
                    if np.issubdtype(out0.dtype, np.bool_) and np.issubdtype(out1.dtype, np.bool_):
                        absdiff = np.logical_xor(out0, out1)
                    else:
                        absdiff = np.abs(out0 - out1)
                    absout1 = np.abs(out1)
                    max_absdiff = max(compute_max(absdiff), 0.0)
                    required_atol_if_rtol = max(compute_max(absdiff - per_out_rtol * absout1), 0.0)
                    # Suppress divide by 0 warnings
                    with np.testing.suppress_warnings() as sup:
                        sup.filter(RuntimeWarning)
                        reldiff = np.maximum(absdiff - per_out_atol, 0.0) / absout1
                        max_reldiff = max(compute_max(reldiff), 0.0)
                    return max_absdiff, required_atol_if_rtol, max_reldiff, compute_mean(absdiff), compute_mean(reldiff)


                def log_mismatches(mismatches):
                    try:
                        with G_LOGGER.indent():
                            G_LOGGER.super_verbose("Mismatched indices:\n{:}".format(np.argwhere(mismatches)))
                            G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result0.runner_name, out0[mismatches]))
                            G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result1.runner_name, out1[mismatches]))
                    except:
                        G_LOGGER.warning("Failing to log mismatches - this may be because the outputs are of different shapes")


                try:
                    mismatches = np.logical_not(np.isclose(output0, output1, rtol=per_out_rtol, atol=per_out_atol))
                except Exception as err:
                    G_LOGGER.warning("Failed to compare outputs with:\n{:}\nSkipping".format(err))
                    return False

                G_LOGGER.super_verbose("Runner: {:40} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result0.runner_name, out0_name, out0.dtype, out0.shape, misc.indent_block(out0)))
                G_LOGGER.super_verbose("Runner: {:40} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result1.runner_name, out1_name, out1.dtype, out1.shape, misc.indent_block(out1)))

                failed = np.any(mismatches)

                try:
                    max_absdiff, required_atol_if_rtol, max_reldiff, mean_absdiff, mean_reldiff = compute_required()
                except Exception as err:
                    max_absdiff, required_atol_if_rtol, max_reldiff, mean_absdiff, mean_reldiff = None, None, None, None, None
                    G_LOGGER.warning("Could not determine required tolerances due to an error:\n{:}".format(err))
                    log_msg = ""
                else:
                    log_msg = "Required tolerances: [atol={:.5g}] OR [rtol={:.5g}, atol={:.5g}] OR [rtol={:.5g}, atol={:.5g}] | Mean Error: Absolute={:.5g}, Relative={:.5g}\n".format(
                                    max_absdiff, per_out_rtol, required_atol_if_rtol, max_reldiff, per_out_atol, mean_absdiff, mean_reldiff)

                log_msg += "Runner: {:40} | Stats: mean={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format(
                                iter_result0.runner_name, compute_mean(out0), compute_min(out0), compute_argmin(out0), compute_max(out0), compute_argmax(out0))
                log_msg += "Runner: {:40} | Stats: mean={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format(
                                iter_result1.runner_name, compute_mean(out1), compute_min(out1), compute_argmin(out1), compute_max(out1), compute_argmax(out1))
                G_LOGGER.info(log_msg)

                if failed:
                    log_mismatches(mismatches)
                    G_LOGGER.error("FAILED | Difference exceeds tolerance (rtol={:}, atol={:})".format(per_out_rtol, per_out_atol))
                else:
                    G_LOGGER.finish("PASSED | Difference is within tolerance (rtol={:}, atol={:})".format(per_out_rtol, per_out_atol))

                G_LOGGER.extra_verbose("Finished comparing: '{:}' (dtype={:}, shape={:}) [{:}] and '{:}' (dtype={:}, shape={:}) [{:}]"
                                .format(out0_name, out0.dtype, out0.shape, iter_result0.runner_name, out1_name, out1.dtype, out1.shape, iter_result1.runner_name))
                return OutputCompareResult(not failed, max_absdiff, max_reldiff)


            output_status = OrderedDict() # OrderedDict[str, bool] Maps output names to whether they matched.

            if not check_shapes:
                G_LOGGER.info("Strict shape checking disabled. Will attempt to match output shapes before comparisons")


            def default_find_output_func(output_name, index, iter_result):
                found_name = misc.find_in_dict(output_name, iter_result, index)
                if found_name is None:
                    return None
                elif found_name != output_name:
                    exact_match = misc.find_in_dict(found_name, iter_result0)
                    if exact_match == found_name:
                        G_LOGGER.verbose("Will not compare {:} with {:}, since the former already has an exact match: {:}".format(
                                            found_name, output_name, exact_match))
                        return None # If the found output is being compared against another output already, skip this non-exact match
                    G_LOGGER.warning("Output names did not match exactly. Assuming {:} output: {:} "
                                    "corresponds to output: {:}".format(
                                        iter_result.runner_name, found_name, output_name))
                return [found_name]


            nonlocal find_output_func
            find_output_func = misc.default_value(find_output_func, default_find_output_func)

            for index, (out0_name, output0) in enumerate(iter_result0.items()):
                out1_names = misc.default_value(find_output_func(out0_name, index, iter_result1), [])

                if len(out1_names) > 1:
                    G_LOGGER.info("Will attempt to compare output: '{:}' [{:}] with multiple outputs: '{:}' [{:}]".format(
                                    out0_name, iter_result0.runner_name, list(out1_names), iter_result1.runner_name))

                for out1_name in out1_names:
                    if out1_name is None or out1_name not in iter_result1:
                        G_LOGGER.warning("For output: '{:}' [{:}], skipping corresponding output: '{:}' [{:}], "
                                         "since the output was not found".format(out0_name, iter_result0.runner_name,
                                                                                 out1_name, iter_result1.runner_name))
                        continue

                    output1 = iter_result1[out1_name]
                    G_LOGGER.start("Comparing Output: '{:}' (dtype={:}, shape={:}) with '{:}' (dtype={:}, shape={:})".format(
                                        out0_name, output0.dtype, output0.shape, out1_name, output1.dtype, output1.shape))
                    G_LOGGER.extra_verbose("Note: Comparing {:} vs. {:}".format(iter_result0.runner_name, iter_result1.runner_name))


                    def get_tol(tol_dict):
                        if isinstance(tol_dict, numbers.Number):
                            return tol_dict

                        if out0_name in tol_dict:
                            return tol_dict[out0_name]
                        elif "" in tol_dict:
                            return tol_dict[""]

                        G_LOGGER.critical("Could not find a tolerance for output: '{:}' in the provided tolerance map: {:}.\n"
                                          "Note: Use a key of `""` in the map to specify a default tolerance.".format(out0_name, tol_dict))


                    with G_LOGGER.indent():
                        if check_shapes and output0.shape != output1.shape:
                            G_LOGGER.error("Will not compare outputs of different shapes. Note: Output shapes are "
                                           "{:} and {:}.".format(output0.shape, output1.shape))
                            G_LOGGER.error("Note: Use --no-strict-shape-checking or set check_shapes=False to "
                                           "attempt to compare values anyway.", mode=LogMode.ONCE)
                            outputs_match = False
                        else:
                            output1 = misc.try_match_shape(output1, output0.shape)
                            output0 = output0.reshape(output1.shape)
                            outputs_match = check_outputs_match(output0, out0_name, output1, out1_name,
                                                                per_out_rtol=get_tol(rtol), per_out_atol=get_tol(atol))

                        output_status[out0_name] = outputs_match
                        if fail_fast and not outputs_match:
                            return output_status


            mismatched_output_names = [name for name, matched in output_status.items() if not matched]
            if mismatched_output_names:
                G_LOGGER.error("FAILED | Mismatched outputs: {:}".format(mismatched_output_names))

            # This is useful for catching cases were Polygraphy does something wrong with the runner output buffers
            if not output_status and (bool(iter_result0.keys()) or bool(iter_result1.keys())):
                r0_name = iter_result0.runner_name
                r0_outs = list(iter_result0.keys())
                r1_name = iter_result1.runner_name
                r1_outs = list(iter_result1.keys())
                G_LOGGER.critical("All outputs were skipped, no common outputs found! Note:\n{:} outputs: "
                                  "{:}\n{:} outputs: {:}".format(r0_name, r0_outs, r1_name, r1_outs))

            return output_status
예제 #17
0
    def run(self, args):
        if not self.arg_groups[OnnxSaveArgs].path and not args.min_good:
            G_LOGGER.critical(
                "--output (where to write the reduced model) and/or "
                "--min-good (where to write a reduced model that passes) must be provided!"
            )

        model = self.arg_groups[OnnxLoaderArgs].load_onnx()
        num_orig_nodes = len(model.graph.node)

        # When --model-input-shapes are set, we need to override the shapes in the model, and then run
        # shape inference to figure out the new shapes of intermediate tensors.
        user_input_metadata = self.arg_groups[ModelArgs].input_shapes
        if user_input_metadata:
            model = gs.export_onnx(
                tools_util.override_input_shapes(onnx_backend.gs_from_onnx(model), user_input_metadata)
            )
            if self.arg_groups[OnnxShapeInferenceArgs].do_shape_inference:
                model = onnx_backend.infer_shapes(model)

        # Lower Constant nodes into Constant tensors
        # If we don't do this, the outputs of Constant nodes may be incorrectly marked
        #   as variable inputs. Further, fallback shape inference does not apply to Constant nodes.
        GRAPH = onnx_util.lower_constant_nodes(onnx_backend.gs_from_onnx(model))

        _layerwise_outputs = None
        _layerwise_meta = None
        # Get metadata inferred by fallback shape inference. If fallback shape inference was
        # never run, then this function runs it.
        def layerwise(model, include_data=False):
            nonlocal _layerwise_outputs, _layerwise_meta
            if _layerwise_outputs is None or _layerwise_meta is None:
                G_LOGGER.info(
                    "Running inference with ONNX-Runtime to determine metadata for intermediate tensors.\n"
                    "This will cause intermediate models to have static shapes."
                )
                _layerwise_outputs, _layerwise_meta = self.arg_groups[OnnxShapeInferenceArgs].fallback_inference(model)
            return _layerwise_outputs if include_data else _layerwise_meta

        if self.arg_groups[OnnxShapeInferenceArgs].force_fallback:
            G_LOGGER.info("Freezing shapes in the model according to values determined by fallback shape inference")
            onnx_util.set_shapes_from_layerwise_meta(GRAPH, layerwise(model))

        def fix_graph(graph, model):
            """
            Fix the graph so it is valid ONNX.
            """

            def fix_tensor_metadata(tensors, fix_shape=True):
                for tensor in tensors:
                    if not tensor.shape and fix_shape:
                        tensor.shape = layerwise(model)[tensor.name].shape
                    if not tensor.dtype:
                        tensor.dtype = layerwise(model)[tensor.name].dtype

            fix_tensor_metadata(graph.inputs)
            fix_tensor_metadata(graph.outputs, fix_shape=False)

            # If we're marking inputs, there may be cases where some other inputs are required - for
            # example, if the model is branchy. If, after cleanup(), there are any Variable tensors in
            # the graph without inputs, we'll replace them with constants and fold them away.
            tensor_map = graph.tensors()
            needs_const_fold = False
            for tensor in tensor_map.values():
                if isinstance(tensor, gs.Variable) and not tensor.inputs and tensor not in graph.inputs:
                    needs_const_fold = True
                    G_LOGGER.info("Freezing model input: {:}".format(tensor))
                    tensor.to_constant(layerwise(model, include_data=True)[tensor.name])

            if needs_const_fold:
                G_LOGGER.info("Folding constants to remove extraneous subgraphs")
                graph.fold_constants().cleanup()

            return graph

        def mark_io(graph, attr, tensors, filter_const=True):
            if filter_const:
                tensors = [t for t in tensors if not isinstance(t, gs.Constant)]

            if not tensors:
                G_LOGGER.warning(
                    "No non-constant tensors are available to mark. "
                    "Try folding constants in the model with `polygraphy surgeon sanitize --fold-constants`"
                )

            setattr(graph, attr, tensors)
            G_LOGGER.info("Marking model {attr}: {:}".format(getattr(graph, attr), attr=attr))
            return graph

        def names_from_tensors(tensors):
            return [t.name for t in tensors]

        def lookup_tensors(graph, names):
            tensor_map = graph.tensors()
            return [tensor_map[name] for name in names]

        # Bisect using the given marker, and modifying the given graph attribute.
        # attr should be one of ["inputs", "outputs"].
        # filter_const indicates whether to filter out constant tensors before updating graph I/O.
        def bisect_io(graph, model, marker, attr, filter_const=True):
            G_LOGGER.start("Reducing model {:}".format(attr))
            iter_graph = graph

            while not marker.stop():
                G_LOGGER.start(
                    "RUNNING | Iteration {:} | Approximately {:} iteration(s) remaining".format(
                        marker.iteration + 1, marker.remaining()
                    )
                )
                iter_graph = graph.copy()  # This is a very light-weight copy of the entire graph.

                with G_LOGGER.indent():
                    io_list = list(getattr(iter_graph.nodes[marker.node_index], attr))
                    mark_io(iter_graph, attr, io_list, filter_const)
                    iter_graph.cleanup()
                    self.arg_groups[OnnxSaveArgs].save_onnx(
                        gs.export_onnx(fix_graph(iter_graph, model)), self.arg_groups[ArtifactSorterArgs].iter_artifact
                    )

                num_nodes = len(iter_graph.nodes)
                success = self.arg_groups[ArtifactSorterArgs].sort_artifacts(
                    marker.iteration + 1, suffix="_reduce_{:}_{:}_nodes".format(attr, num_nodes)
                )
                marker.step(success, num_nodes)

            marker.finish()
            G_LOGGER.finish("Finished reducing model {attr}".format(attr=attr))

            # Find minimal good/bad inputs/outputs, falling back to existing graph inputs/outputs.
            def get_io(index):
                if index is None:
                    return names_from_tensors(getattr(graph, attr))
                return names_from_tensors(list(getattr(graph.nodes[index], attr)))

            return get_io(marker.best_bad_node_index), get_io(marker.best_good_node_index)

        # We reduce the model in 2 phases:
        #   1. Find the earliest output nodes that cause a failure.
        #   2. Find the latest input nodes cause a failure.

        MarkerType = BisectMarker if args.mode == "bisect" else LinearMarker

        bad_graph = GRAPH.copy()

        good_graph = None
        if args.min_good:
            good_graph = GRAPH.copy()

        # == Phase 1 ==

        if args.reduce_outputs:
            out_marker = MarkerType(len(bad_graph.nodes))
            bad_outputs, good_outputs = bisect_io(bad_graph, model, out_marker, attr="outputs", filter_const=False)
            bad_graph = mark_io(bad_graph, "outputs", lookup_tensors(bad_graph, bad_outputs)).cleanup()
            if good_graph is not None:
                good_graph = mark_io(
                    good_graph, "outputs", lookup_tensors(good_graph, good_outputs)
                )  # Defer cleanup where possible.
            # Export the model with the reduced outputs so that reducing inputs is faster.
            model = gs.export_onnx(fix_graph(bad_graph, model))

        # == Phase 2 ==

        if args.reduce_inputs:
            in_marker = MarkerType(len(bad_graph.nodes), invert=True)
            bad_inputs, good_inputs = bisect_io(bad_graph, model, in_marker, attr="inputs")
            bad_graph = mark_io(bad_graph, "inputs", lookup_tensors(bad_graph, bad_inputs)).cleanup()
            if good_graph is not None:
                good_graph = mark_io(
                    good_graph, "inputs", lookup_tensors(good_graph, good_inputs)
                )  # Defer cleanup where possible.

        # == Write Bad Model ==

        reduced_model = gs.export_onnx(fix_graph(bad_graph, model))

        if self.arg_groups[OnnxSaveArgs].path:
            num_reduced_nodes = len(reduced_model.graph.node)

            if (
                float(num_reduced_nodes) / float(num_orig_nodes) >= 0.25
                and num_reduced_nodes > 1
                and args.mode == "bisect"
            ):
                G_LOGGER.warning(
                    "It looks like this model could potentially be reduced further.\n"
                    "You may want to reduce {:} again using --mode=linear. ".format(self.arg_groups[OnnxSaveArgs].path)
                )

            G_LOGGER.info("Minimum Bad Model:\n{:}\n\n".format(onnx_util.str_from_onnx(reduced_model, mode="none")))
            self.arg_groups[OnnxSaveArgs].save_onnx(reduced_model)

        # == Write Good Model ==

        if good_graph is not None:
            min_good_model = gs.export_onnx(fix_graph(good_graph.cleanup(), model))
            if min_good_model == reduced_model:
                G_LOGGER.warning(
                    "Could not find a minimal model close in size to the reduced model that does not cause a failure."
                )
            else:
                G_LOGGER.info(
                    "Minimum Good Model:\n{:}\n\n".format(onnx_util.str_from_onnx(min_good_model, mode="none"))
                )
                self.arg_groups[OnnxSaveArgs].save_onnx(min_good_model, args.min_good)
예제 #18
0
    def activate_impl(self):
        """
        Vars:
            engine (trt.ICudaEngine):
                    The engine tracked by this runner. The TrtLegacyRunner OWNS the engine it
                    manages, and therefore is responsible for it's destruction. Do not free the engine outside of the
                    runner, or it will result in a double free.
            context (trt.IExecutionContext): The context used for inference.
            input_buffers (Dict[str, TrtLegacyRunner.HostDeviceMem]):
                    A mapping of binding names to HostDeviceMem objects for input buffers.
            output_buffers (Dict[str, TrtLegacyRunner.HostDeviceMem]):
                    A mapping of binding names to HostDeviceMem objects for output buffers.
            bindings (List[int]): A list of device pointers for engine bindings.
            stream (cuda.Stream): The CUDA stream that this runner will use for inference.
        """

        # Only initialize GPU after this runner is activated.
        # Allocates all buffers required for an engine, i.e. host/device input_buffers/output_buffers.
        def allocate_buffers(engine):
            input_buffers = OrderedDict()
            output_buffers = OrderedDict()
            bindings = []
            stream = cuda.Stream()
            G_LOGGER.verbose("Using batch size: " +
                             str(engine.max_batch_size) +
                             " during buffer allocation")
            for binding in engine:
                shape = (engine.max_batch_size, ) + tuple(
                    engine.get_binding_shape(binding))
                dtype = engine.get_binding_dtype(binding)

                device_mem = cuda.DeviceBuffer(shape=shape,
                                               dtype=trt.nptype(dtype))
                G_LOGGER.extra_verbose("Tensor: "
                                       "{:40} | Allocated: {:}".format(
                                           binding, device_mem))

                if engine.binding_is_input(binding):
                    input_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        None, device_mem)
                else:
                    host_mem = np.empty(shape=shape, dtype=trt.nptype(dtype))
                    output_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        host_mem, device_mem)
            return input_buffers, output_buffers, stream

        # Always try reading the engine first, or, failing that, build it.
        if self.load_engine:
            with open(self.load_engine,
                      "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
                G_LOGGER.info("Reading engine from {:}".format(
                    self.load_engine))
                self.engine = runtime.deserialize_cuda_engine(f.read())
        else:
            trt.init_libnvinfer_plugins(TRT_LOGGER, "")
            builder, network, parser, model_batch_size = self.network_loader()
            with builder, network, parser:
                builder.max_batch_size = int(self.max_batch_size
                                             or model_batch_size or 1)

                config = builder.create_builder_config()
                config.max_workspace_size = int(self.max_workspace_size)

                if not self.tf32:
                    with contextlib.suppress(AttributeError):
                        config.clear_flag(trt.BuilderFlag.TF32)
                if self.fp16:
                    config.flags = 1 << int(trt.BuilderFlag.FP16)

                if not network:
                    G_LOGGER.critical("Invalid network")
                G_LOGGER.super_verbose(lambda: trt_util.str_from_network(
                    network) or "Finished logging network")

                if self.layerwise:
                    # In layerwise mode, every layer becomes an output.
                    G_LOGGER.info(
                        "Running in layerwise mode. Marking {:} layers as outputs"
                        .format(network.num_layers))
                    for layer in network:
                        for index in range(layer.num_outputs):
                            out = layer.get_output(index)
                            if not out.is_network_output:
                                network.mark_output(out)

                G_LOGGER.info(
                    "Building engine: max workspace size={:} bytes, max batch size={:}, fp16={:}, "
                    "tf32={:}".format(builder.max_workspace_size,
                                      builder.max_batch_size, self.fp16,
                                      self.tf32))
                self.engine = builder.build_engine(network, config)

        if not self.engine:
            G_LOGGER.critical(
                "Invalid Engine. Please ensure the engine was built correctly")

        if self.engine_path:
            with open(self.engine_path, "wb") as f:
                G_LOGGER.info("Writing engine to {:}".format(self.engine_path))
                f.write(self.engine.serialize())

        self.context = self.engine.create_execution_context()
        self.input_buffers, self.output_buffers, self.stream = allocate_buffers(
            self.engine)
예제 #19
0
        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                input_metadata = active_runner.get_input_metadata()
                G_LOGGER.info("Runner: {:40} | Input Metadata: {:}".format(
                    active_runner.name, input_metadata),
                              mode=LogMode.ONCE)
                # DataLoaderCache will ensure that the feed_dict does not contain any extra entries
                # based on the provided input_metadata.
                loader_cache.set_input_metadata(input_metadata)

                if warm_up:
                    G_LOGGER.start(
                        "Runner: {:40} | Running {:} warm-up runs".format(
                            active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning(
                            "{:} warm-up runs were requested, but data loader did not supply any data. "
                            "Skipping warm-up runs".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose(
                            "Warm-up Input Buffers:\n{:}".format(
                                misc.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for i in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)

                # Then, actual iterations.
                index = 0
                iteration_results = []
                output_metadata = TensorMetadata()

                for index, feed_dict in enumerate(loader_cache):
                    G_LOGGER.extra_verbose(
                        lambda: "Runner: {:40} | Feeding inputs:\n{:}".format(
                            active_runner.name, misc.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    # Without a deep copy here, outputs will always reference the output of the last run
                    iteration_results.append(
                        IterationResult(outputs=copy.deepcopy(outputs),
                                        runtime=runtime,
                                        runner_name=active_runner.name))

                    if index == 0:
                        for name, out in outputs.items():
                            output_metadata.add(name, out.dtype, out.shape)

                    G_LOGGER.info(
                        "Runner: {:40} | Output Metadata: {:}".format(
                            active_runner.name, output_metadata),
                        mode=LogMode.ONCE)
                    G_LOGGER.extra_verbose(
                        lambda:
                        "Runner: {:40} | Inference Time: {:.3f} ms | Received outputs:\n{:}"
                        .format(active_runner.name, runtime * 1000.0,
                                misc.indent_block(outputs)))

                G_LOGGER.finish(
                    "Runner: {:40} | Completed {:} iterations.".format(
                        active_runner.name, index + 1))
                return iteration_results
예제 #20
0
    def validate(run_results,
                 check_finite=None,
                 check_nan=None,
                 fail_fast=None):
        """
        Checks output validity.

        Args:
            run_results (Dict[str, List[IterationResult]]): The result of Comparator.run().
            check_finite (bool): Whether to fail on non-finite values. Defaults to False.
            check_nan (bool): Whether to fail on NaNs. Defaults to True.
            fail_fast (bool): Whether to fail after the first invalid value. Defaults to False.

        Returns:
            bool: True if all outputs were valid, False otherwise.
        """
        check_finite = misc.default_value(check_finite, False)
        check_nan = misc.default_value(check_nan, True)
        fail_fast = misc.default_value(fail_fast, False)

        def is_finite(output):
            non_finite = np.logical_not(np.isfinite(output))
            if np.any(non_finite):
                G_LOGGER.error("Encountered one or more non-finite values")
                G_LOGGER.error(
                    "Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display non-finite values",
                    mode=LogMode.ONCE)
                G_LOGGER.extra_verbose(
                    "Note: non-finite values at:\n{:}".format(non_finite))
                G_LOGGER.extra_verbose("Note: non-finite values:\n{:}".format(
                    output[non_finite]))
                return False
            return True

        def is_not_nan(output):
            nans = np.isnan(output)
            if np.any(nans):
                G_LOGGER.error("Encountered one or more NaNs")
                G_LOGGER.error(
                    "Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display locations of NaNs",
                    mode=LogMode.ONCE)
                G_LOGGER.extra_verbose("Note: NaNs at:\n{:}".format(nans))
                return False
            return True

        all_valid = True
        for runner_name, results in run_results:
            for result in results:
                for output_name, output in result.items():
                    G_LOGGER.info(
                        "Runner: {:40} | Validating output: {:} (check_finite={:}, check_nan={:})"
                        .format(runner_name, output_name, check_finite,
                                check_nan))

                    output_valid = True
                    with G_LOGGER.indent():
                        if check_nan:
                            output_valid &= is_not_nan(output)
                        if check_finite:
                            output_valid &= is_finite(output)

                        all_valid &= output_valid

                        if output_valid:
                            G_LOGGER.finish(
                                "Runner: {:40} | Output: {:} is valid".format(
                                    runner_name, output_name))
                        else:
                            G_LOGGER.error(
                                "Runner: {:40} | Errors detected in output: {:}"
                                .format(runner_name, output_name))
                            if fail_fast:
                                return False

        if all_valid:
            G_LOGGER.finish("Validation passed")
        else:
            G_LOGGER.error("Validation failed")
        return all_valid
예제 #21
0
    def compare_accuracy(run_results,
                         fail_fast=False,
                         comparisons=None,
                         compare_func=None):
        """
        Args:
            run_results (RunResults): The result of Comparator.run()


            fail_fast (bool): Whether to exit after the first failure
            comparisons (List[Tuple[int, int]]):
                    Comparisons to perform, specified by runner indexes. For example, [(0, 1), (1, 2)]
                    would compare the first runner with the second, and the second with the third.
                    By default, this compares each result to the subsequent one.
            compare_func (Callable(IterationResult, IterationResult) -> OrderedDict[str, bool]):
                    A function that takes in two IterationResults, and returns a dictionary that maps output
                    names to a boolean (or anything convertible to a boolean) indicating whether outputs matched.
                    The order of arguments to this function is guaranteed to be the same as the ordering of the
                    tuples contained in `comparisons`.

        Returns:
            AccuracyResult:
                    A summary of the results of the comparisons. The order of the keys (i.e. runner pairs) is
                    guaranteed to be the same as the order of `comparisons`. For more details, see the AccuracyResult
                    docstring (e.g. help(AccuracyResult)).
        """
        def find_mismatched(match_dict):
            return [
                name for name, matched in match_dict.items()
                if not bool(matched)
            ]

        compare_func = misc.default_value(compare_func,
                                          CompareFunc.basic_compare_func())
        comparisons = misc.default_value(
            comparisons, Comparator.default_comparisons(run_results))

        accuracy_result = AccuracyResult()
        for runner0_index, runner1_index in comparisons:
            (runner0_name, results0), (
                runner1_name, results1
            ) = run_results[runner0_index], run_results[runner1_index]

            G_LOGGER.start("Accuracy Comparison | {:} vs. {:}".format(
                runner0_name, runner1_name))
            with G_LOGGER.indent():
                runner_pair = (runner0_name, runner1_name)
                accuracy_result[runner_pair] = []

                num_iters = min(len(results0), len(results1))
                for iteration, (result0,
                                result1) in enumerate(zip(results0, results1)):
                    if num_iters > 1:
                        G_LOGGER.info("Iteration: {:}".format(iteration))
                    with contextlib.ExitStack() as stack:
                        if num_iters > 1:
                            stack.enter_context(G_LOGGER.indent())
                        iteration_match_dict = compare_func(result0, result1)
                        accuracy_result[runner_pair].append(
                            iteration_match_dict)

                    mismatched_outputs = find_mismatched(iteration_match_dict)
                    if fail_fast and mismatched_outputs:
                        return accuracy_result

                G_LOGGER.extra_verbose(
                    "Finished comparing {:} with {:}".format(
                        runner0_name,
                        runner1_name,
                    ))

                passed, failed, total = accuracy_result.stats(runner_pair)
                pass_rate = accuracy_result.percentage(runner_pair) * 100.0
                if num_iters > 1 or len(comparisons) > 1:
                    msg = "Accuracy Summary | {:} vs. {:} | Passed: {:}/{:} iterations | Pass Rate: {:}%".format(
                        runner0_name, runner1_name, passed, total, pass_rate)
                    if passed == total:
                        G_LOGGER.finish(msg)
                    else:
                        G_LOGGER.error(msg)
        return accuracy_result
예제 #22
0
            def check_outputs_match(out0, out0_name, out1, out1_name, per_out_rtol, per_out_atol):
                def compute_max(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.amax(buffer)

                # Returns index of max value
                def compute_argmax(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.unravel_index(np.argmax(buffer), buffer.shape)

                def compute_min(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.amin(buffer)

                # Returns index of min value
                def compute_argmin(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.unravel_index(np.argmin(buffer), buffer.shape)

                def compute_mean(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.mean(buffer)


                def compute_required():
                    # The purpose of this function is to determine the minimum tolerances such that
                    # the outputs would be considered a match.
                    # The NumPy formula for np.isclose is absolute(out0 - out1) <= (per_out_atol + per_out_rtol * absolute(out1))
                    # So, for both absolute/relative tolerance, given either one,
                    # we can compute the required value for the other:
                    # per_out_atol = absolute(out0 - out1)
                    # atol_if_rtol = absolute(out0 - out1)  - per_out_rtol * absolute(out1)
                    # per_out_rtol = (absolute(out0 - out1) - per_out_atol) / absolute(out1)
                    if np.issubdtype(out0.dtype, np.bool_) and np.issubdtype(out1.dtype, np.bool_):
                        absdiff = np.logical_xor(out0, out1)
                    else:
                        absdiff = np.abs(out0 - out1)
                    absout1 = np.abs(out1)
                    max_absdiff = max(compute_max(absdiff), 0.0)
                    required_atol_if_rtol = max(compute_max(absdiff - per_out_rtol * absout1), 0.0)
                    # Suppress divide by 0 warnings
                    with np.testing.suppress_warnings() as sup:
                        sup.filter(RuntimeWarning)
                        reldiff = np.maximum(absdiff - per_out_atol, 0.0) / absout1
                        max_reldiff = max(compute_max(reldiff), 0.0)
                    return max_absdiff, required_atol_if_rtol, max_reldiff, compute_mean(absdiff), compute_mean(reldiff)


                def log_mismatches(mismatches):
                    try:
                        with G_LOGGER.indent():
                            G_LOGGER.super_verbose("Mismatched indices:\n{:}".format(np.argwhere(mismatches)))
                            G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result0.runner_name, out0[mismatches]))
                            G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result1.runner_name, out1[mismatches]))
                    except:
                        G_LOGGER.warning("Failing to log mismatches - this may be because the outputs are of different shapes")


                try:
                    mismatches = np.logical_not(np.isclose(output0, output1, rtol=per_out_rtol, atol=per_out_atol))
                except Exception as err:
                    G_LOGGER.warning("Failed to compare outputs with:\n{:}\nSkipping".format(err))
                    return False

                G_LOGGER.super_verbose("Runner: {:40} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result0.runner_name, out0_name, out0.dtype, out0.shape, misc.indent_block(out0)))
                G_LOGGER.super_verbose("Runner: {:40} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result1.runner_name, out1_name, out1.dtype, out1.shape, misc.indent_block(out1)))

                failed = np.any(mismatches)

                try:
                    max_absdiff, required_atol_if_rtol, max_reldiff, mean_absdiff, mean_reldiff = compute_required()
                except Exception as err:
                    max_absdiff, required_atol_if_rtol, max_reldiff, mean_absdiff, mean_reldiff = None, None, None, None, None
                    G_LOGGER.warning("Could not determine required tolerances due to an error:\n{:}".format(err))
                    log_msg = ""
                else:
                    log_msg = "Required tolerances: [atol={:.5g}] OR [rtol={:.5g}, atol={:.5g}] OR [rtol={:.5g}, atol={:.5g}] | Mean Error: Absolute={:.5g}, Relative={:.5g}\n".format(
                                    max_absdiff, per_out_rtol, required_atol_if_rtol, max_reldiff, per_out_atol, mean_absdiff, mean_reldiff)

                log_msg += "Runner: {:40} | Stats: mean={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format(
                                iter_result0.runner_name, compute_mean(out0), compute_min(out0), compute_argmin(out0), compute_max(out0), compute_argmax(out0))
                log_msg += "Runner: {:40} | Stats: mean={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format(
                                iter_result1.runner_name, compute_mean(out1), compute_min(out1), compute_argmin(out1), compute_max(out1), compute_argmax(out1))
                G_LOGGER.info(log_msg)

                if failed:
                    log_mismatches(mismatches)
                    G_LOGGER.error("FAILED | Difference exceeds tolerance (rtol={:}, atol={:})".format(per_out_rtol, per_out_atol))
                else:
                    G_LOGGER.finish("PASSED | Difference is within tolerance (rtol={:}, atol={:})".format(per_out_rtol, per_out_atol))

                G_LOGGER.extra_verbose("Finished comparing: '{:}' (dtype={:}, shape={:}) [{:}] and '{:}' (dtype={:}, shape={:}) [{:}]"
                                .format(out0_name, out0.dtype, out0.shape, iter_result0.runner_name, out1_name, out1.dtype, out1.shape, iter_result1.runner_name))
                return OutputCompareResult(not failed, max_absdiff, max_reldiff)
예제 #23
0
    def run(runners,
            data_loader=None,
            warm_up=None,
            use_subprocess=None,
            subprocess_timeout=None,
            subprocess_polling_interval=None):
        """
        Runs the supplied runners sequentially.

        Args:
            data_loader (Generator -> OrderedDict[str, np.ndarray]):
                    A generator or iterable that yields a dictionary that maps input names to input numpy buffers.
                    In the simplest case, this can be a `List[Dict[str, np.ndarray]]` .

                    In case you don't know details about the inputs ahead of time, you can access the
                    `input_metadata` property in your data loader, which will be set to an `TensorMetadata`
                    instance by this function.
                    Note that this does not work for generators or lists.

                    The number of iterations run by this function is controlled by the number of items supplied
                    by the data loader.

                    Defaults to an instance of `DataLoader`.
            warm_up (int):
                    The number of warm up runs to perform for each runner before timing.
                    Defaults to 0.
            use_subprocess (bool):
                    Whether each runner should be run in a subprocess. This allows each runner to have exclusive
                    access to the GPU. When using a subprocess, runners and loaders will never be modified.
            subprocess_timeout (int):
                    The timeout before a subprocess is killed automatically. This is useful for handling processes
                    that never terminate. A value of None disables the timeout. Defaults to None.
            subprocess_polling_interval (int):
                    The polling interval, in seconds, for checking whether a subprocess has completed or crashed.
                    In rare cases, omitting this parameter when subprocesses are enabled may cause this function
                    to hang indefinitely if the subprocess crashes.
                    A value of 0 disables polling. Defaults to 30 seconds.

        Returns:
            RunResults: A mapping of runner names to the results of their inference. The ordering of `runners` is preserved in this mapping.
        """
        warm_up = misc.default_value(warm_up, 0)
        data_loader = misc.default_value(data_loader, DataLoader())
        use_subprocess = misc.default_value(use_subprocess, False)
        subprocess_polling_interval = misc.default_value(
            subprocess_polling_interval, 30)
        loader_cache = DataLoaderCache(data_loader)

        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                input_metadata = active_runner.get_input_metadata()
                G_LOGGER.verbose("Runner: {:40} | Input Metadata:\n{:}".format(
                    active_runner.name, misc.indent_block(input_metadata)))
                loader_cache.set_input_metadata(input_metadata)

                if warm_up:
                    G_LOGGER.info(
                        "Runner: {:40} | Running {:} warm-up runs".format(
                            active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning(
                            "{:} warm-up runs were requested, but data loader did not supply any data. "
                            "Skipping warm-up runs".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose(
                            "Warm-up Input Buffers:\n{:}".format(
                                misc.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for i in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)

                # Then, actual iterations.
                total_time = 0
                run_results = []
                for feed_dict in loader_cache:
                    G_LOGGER.extra_verbose(
                        lambda: "Runner: {:40} | Feeding inputs:\n{:}".format(
                            active_runner.name, misc.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    # Without a deep copy here, outputs will always reference the output of the last run
                    run_results.append(
                        IterationResult(outputs=copy.deepcopy(outputs),
                                        runtime=runtime,
                                        runner_name=active_runner.name))

                    if len(run_results) == 1:
                        output_metadata = TensorMetadata()
                        for name, out in outputs.items():
                            output_metadata.add(name, out.dtype, out.shape)

                    G_LOGGER.verbose(
                        "Runner: {:40} | Output Metadata:\n{:}".format(
                            active_runner.name,
                            misc.indent_block(output_metadata)),
                        mode=LogMode.ONCE)
                    G_LOGGER.extra_verbose(
                        lambda:
                        "Runner: {:40} | Inference Time: {:.3f} ms | Received outputs:\n{:}"
                        .format(active_runner.name, runtime * 1000.0,
                                misc.indent_block(outputs)))

                G_LOGGER.info(
                    "Runner: {:40} | Completed {:} iterations.".format(
                        active_runner.name, len(run_results)))
                return run_results

        # Wraps execute_runner to use a queue.
        def execute_runner_with_queue(runner_queue, runner, loader_cache):
            run_results = None
            try:
                run_results = execute_runner(runner, loader_cache)
            except:
                # Cannot send the exception back, as it is not necessarily pickleable
                import traceback
                G_LOGGER.error(traceback.format_exc())
            misc.try_send_on_queue(runner_queue, run_results)
            # After finishing, send the updated loader_cache back.
            misc.try_send_on_queue(runner_queue, loader_cache)

        # Do all inferences in one loop, then comparisons at a later stage.
        # We run each runner in a separate process so that we can provide exclusive GPU access for each runner.
        runner_queue = Queue()
        run_results = RunResults()
        for index, runner in enumerate(runners):
            G_LOGGER.info(
                "Runner: {:40} | Activating and starting inference".format(
                    runner.name))
            if use_subprocess:
                process = Process(target=execute_runner_with_queue,
                                  args=(runner_queue, runner, loader_cache))
                process.start()

                # If a subprocess hangs in a certain way, then process.join could block forever. Hence,
                # we need to keep polling the process to make sure it really is alive.
                run_results[runner.name] = None
                while process.is_alive() and run_results[runner.name] is None:
                    try:
                        run_results[runner.name] = misc.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                        # Receive updated loader cache, or fall back if it could not be sent.
                        loader_cache = misc.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                    except queue.Empty:
                        G_LOGGER.extra_verbose(
                            "Polled subprocess - still running")

                try:
                    assert run_results[runner.name] is not None
                    process.join(subprocess_timeout)
                except:
                    G_LOGGER.critical(
                        "Runner: {:40} | Terminated prematurely. Check the exception logged above. "
                        "If there is no exception logged above, make sure not to use the --use-subprocess "
                        "flag or set use_subprocess=False in Comparator.run()."
                        .format(runner.name))
                finally:
                    process.terminate()

                if loader_cache is None:
                    G_LOGGER.critical(
                        "Could not send data loader cache to runner subprocess. Please try disabling subprocesses "
                        "by removing the --use-subprocess flag, or setting use_subprocess=False in Comparator.run()"
                    )
            else:
                run_results[runner.name] = execute_runner(runner, loader_cache)

        G_LOGGER.verbose("Successfully ran: {:}".format(
            [r.name for r in runners]))
        return run_results
예제 #24
0
    def activate_impl(self):
        self.model, _ = misc.try_call(self._model)

        import onnx_tf
        G_LOGGER.info("Preparing ONNX-TF backend")
        self.tf_rep = onnx_tf.backend.prepare(self.model)
예제 #25
0
def load_plugins(plugins):
    for plugin in plugins:
        G_LOGGER.info("Loading plugin library: {:}".format(plugin))
        ctypes.CDLL(plugin)