コード例 #1
0
    def __call__(self):
        """
        Builds a TensorRT engine.

        Returns:
            trt.ICudaEngine: The engine that was created.
        """
        # If network is a callable, then we own its return value
        ret, owning = misc.try_call(self._network)
        builder, network, parser = misc.unpack_args(ret, num=3)

        with contextlib.ExitStack() as stack:
            provided = "Builder and Network" if parser is None else "Builder, Network, and Parser"
            if owning:
                stack.enter_context(builder)
                stack.enter_context(network)
                if parser is not None:
                    stack.enter_context(parser)
            else:
                G_LOGGER.verbose("{:} were provided directly instead of via a Callable. This loader will not assume ownership. "
                               "Please ensure that they are freed.".format(provided))

            network_log_mode = "full" if G_LOGGER.severity <= G_LOGGER.ULTRA_VERBOSE else "attrs"
            G_LOGGER.super_verbose(lambda: ("Displaying TensorRT Network:\n" + trt_util.str_from_network(network, mode=network_log_mode)))

            config, _ = misc.try_call(self._config, builder, network)
            G_LOGGER.info("Building engine with configuration: {:}".format(trt_util.str_from_config(config)))
            engine = builder.build_engine(network, config)
            if not engine:
                G_LOGGER.critical("Invalid Engine. Please ensure the engine was built correctly")

            if hasattr(config.int8_calibrator, "free"):
                config.int8_calibrator.free()

            return engine
コード例 #2
0
ファイル: compare.py プロジェクト: celidos/TensorRT_study
 def log_mismatches(mismatches):
     try:
         with G_LOGGER.indent():
             G_LOGGER.super_verbose("Mismatched indices:\n{:}".format(np.argwhere(mismatches)))
             G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result0.runner_name, out0[mismatches]))
             G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result1.runner_name, out1[mismatches]))
     except:
         G_LOGGER.warning("Failing to log mismatches - this may be because the outputs are of different shapes")
コード例 #3
0
ファイル: trt_legacy.py プロジェクト: leo-XUKANG/TensorRT-1
    def activate_impl(self):
        """
        Vars:
            engine (trt.ICudaEngine):
                    The engine tracked by this runner. The TrtLegacyRunner OWNS the engine it
                    manages, and therefore is responsible for it's destruction. Do not free the engine outside of the
                    runner, or it will result in a double free.
            context (trt.IExecutionContext): The context used for inference.
            input_buffers (Dict[str, TrtLegacyRunner.HostDeviceMem]):
                    A mapping of binding names to HostDeviceMem objects for input buffers.
            output_buffers (Dict[str, TrtLegacyRunner.HostDeviceMem]):
                    A mapping of binding names to HostDeviceMem objects for output buffers.
            bindings (List[int]): A list of device pointers for engine bindings.
            stream (cuda.Stream): The CUDA stream that this runner will use for inference.
        """

        # Only initialize GPU after this runner is activated.
        # Allocates all buffers required for an engine, i.e. host/device input_buffers/output_buffers.
        def allocate_buffers(engine):
            input_buffers = OrderedDict()
            output_buffers = OrderedDict()
            bindings = []
            stream = cuda.Stream()
            G_LOGGER.verbose("Using batch size: " +
                             str(engine.max_batch_size) +
                             " during buffer allocation")
            for binding in engine:
                shape = (engine.max_batch_size, ) + tuple(
                    engine.get_binding_shape(binding))
                dtype = engine.get_binding_dtype(binding)

                device_mem = cuda.DeviceBuffer(shape=shape,
                                               dtype=trt.nptype(dtype))
                G_LOGGER.extra_verbose("Tensor: "
                                       "{:40} | Allocated: {:}".format(
                                           binding, device_mem))

                if engine.binding_is_input(binding):
                    input_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        None, device_mem)
                else:
                    host_mem = np.empty(shape=shape, dtype=trt.nptype(dtype))
                    output_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        host_mem, device_mem)
            return input_buffers, output_buffers, stream

        # Always try reading the engine first, or, failing that, build it.
        if self.load_engine:
            with open(self.load_engine,
                      "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
                G_LOGGER.info("Reading engine from {:}".format(
                    self.load_engine))
                self.engine = runtime.deserialize_cuda_engine(f.read())
        else:
            trt.init_libnvinfer_plugins(TRT_LOGGER, "")
            builder, network, parser, model_batch_size = self.network_loader()
            with builder, network, parser:
                builder.max_batch_size = int(self.max_batch_size
                                             or model_batch_size or 1)

                config = builder.create_builder_config()
                config.max_workspace_size = int(self.max_workspace_size)

                if not self.tf32:
                    with contextlib.suppress(AttributeError):
                        config.clear_flag(trt.BuilderFlag.TF32)
                if self.fp16:
                    config.flags = 1 << int(trt.BuilderFlag.FP16)

                if not network:
                    G_LOGGER.critical("Invalid network")
                G_LOGGER.super_verbose(lambda: trt_util.str_from_network(
                    network) or "Finished logging network")

                if self.layerwise:
                    # In layerwise mode, every layer becomes an output.
                    G_LOGGER.info(
                        "Running in layerwise mode. Marking {:} layers as outputs"
                        .format(network.num_layers))
                    for layer in network:
                        for index in range(layer.num_outputs):
                            out = layer.get_output(index)
                            if not out.is_network_output:
                                network.mark_output(out)

                G_LOGGER.info(
                    "Building engine: max workspace size={:} bytes, max batch size={:}, fp16={:}, "
                    "tf32={:}".format(builder.max_workspace_size,
                                      builder.max_batch_size, self.fp16,
                                      self.tf32))
                self.engine = builder.build_engine(network, config)

        if not self.engine:
            G_LOGGER.critical(
                "Invalid Engine. Please ensure the engine was built correctly")

        if self.engine_path:
            with open(self.engine_path, "wb") as f:
                G_LOGGER.info("Writing engine to {:}".format(self.engine_path))
                f.write(self.engine.serialize())

        self.context = self.engine.create_execution_context()
        self.input_buffers, self.output_buffers, self.stream = allocate_buffers(
            self.engine)
コード例 #4
0
ファイル: compare.py プロジェクト: celidos/TensorRT_study
            def check_outputs_match(out0, out0_name, out1, out1_name, per_out_rtol, per_out_atol):
                def compute_max(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.amax(buffer)

                # Returns index of max value
                def compute_argmax(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.unravel_index(np.argmax(buffer), buffer.shape)

                def compute_min(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.amin(buffer)

                # Returns index of min value
                def compute_argmin(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.unravel_index(np.argmin(buffer), buffer.shape)

                def compute_mean(buffer):
                    if misc.is_empty_shape(buffer.shape):
                        return 0
                    return np.mean(buffer)


                def compute_required():
                    # The purpose of this function is to determine the minimum tolerances such that
                    # the outputs would be considered a match.
                    # The NumPy formula for np.isclose is absolute(out0 - out1) <= (per_out_atol + per_out_rtol * absolute(out1))
                    # So, for both absolute/relative tolerance, given either one,
                    # we can compute the required value for the other:
                    # per_out_atol = absolute(out0 - out1)
                    # atol_if_rtol = absolute(out0 - out1)  - per_out_rtol * absolute(out1)
                    # per_out_rtol = (absolute(out0 - out1) - per_out_atol) / absolute(out1)
                    if np.issubdtype(out0.dtype, np.bool_) and np.issubdtype(out1.dtype, np.bool_):
                        absdiff = np.logical_xor(out0, out1)
                    else:
                        absdiff = np.abs(out0 - out1)
                    absout1 = np.abs(out1)
                    max_absdiff = max(compute_max(absdiff), 0.0)
                    required_atol_if_rtol = max(compute_max(absdiff - per_out_rtol * absout1), 0.0)
                    # Suppress divide by 0 warnings
                    with np.testing.suppress_warnings() as sup:
                        sup.filter(RuntimeWarning)
                        reldiff = np.maximum(absdiff - per_out_atol, 0.0) / absout1
                        max_reldiff = max(compute_max(reldiff), 0.0)
                    return max_absdiff, required_atol_if_rtol, max_reldiff, compute_mean(absdiff), compute_mean(reldiff)


                def log_mismatches(mismatches):
                    try:
                        with G_LOGGER.indent():
                            G_LOGGER.super_verbose("Mismatched indices:\n{:}".format(np.argwhere(mismatches)))
                            G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result0.runner_name, out0[mismatches]))
                            G_LOGGER.extra_verbose("Runner: {:40} | Mismatched values:\n{:}".format(iter_result1.runner_name, out1[mismatches]))
                    except:
                        G_LOGGER.warning("Failing to log mismatches - this may be because the outputs are of different shapes")


                try:
                    mismatches = np.logical_not(np.isclose(output0, output1, rtol=per_out_rtol, atol=per_out_atol))
                except Exception as err:
                    G_LOGGER.warning("Failed to compare outputs with:\n{:}\nSkipping".format(err))
                    return False

                G_LOGGER.super_verbose("Runner: {:40} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result0.runner_name, out0_name, out0.dtype, out0.shape, misc.indent_block(out0)))
                G_LOGGER.super_verbose("Runner: {:40} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result1.runner_name, out1_name, out1.dtype, out1.shape, misc.indent_block(out1)))

                failed = np.any(mismatches)

                try:
                    max_absdiff, required_atol_if_rtol, max_reldiff, mean_absdiff, mean_reldiff = compute_required()
                except Exception as err:
                    max_absdiff, required_atol_if_rtol, max_reldiff, mean_absdiff, mean_reldiff = None, None, None, None, None
                    G_LOGGER.warning("Could not determine required tolerances due to an error:\n{:}".format(err))
                    log_msg = ""
                else:
                    log_msg = "Required tolerances: [atol={:.5g}] OR [rtol={:.5g}, atol={:.5g}] OR [rtol={:.5g}, atol={:.5g}] | Mean Error: Absolute={:.5g}, Relative={:.5g}\n".format(
                                    max_absdiff, per_out_rtol, required_atol_if_rtol, max_reldiff, per_out_atol, mean_absdiff, mean_reldiff)

                log_msg += "Runner: {:40} | Stats: mean={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format(
                                iter_result0.runner_name, compute_mean(out0), compute_min(out0), compute_argmin(out0), compute_max(out0), compute_argmax(out0))
                log_msg += "Runner: {:40} | Stats: mean={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format(
                                iter_result1.runner_name, compute_mean(out1), compute_min(out1), compute_argmin(out1), compute_max(out1), compute_argmax(out1))
                G_LOGGER.info(log_msg)

                if failed:
                    log_mismatches(mismatches)
                    G_LOGGER.error("FAILED | Difference exceeds tolerance (rtol={:}, atol={:})".format(per_out_rtol, per_out_atol))
                else:
                    G_LOGGER.finish("PASSED | Difference is within tolerance (rtol={:}, atol={:})".format(per_out_rtol, per_out_atol))

                G_LOGGER.extra_verbose("Finished comparing: '{:}' (dtype={:}, shape={:}) [{:}] and '{:}' (dtype={:}, shape={:}) [{:}]"
                                .format(out0_name, out0.dtype, out0.shape, iter_result0.runner_name, out1_name, out1.dtype, out1.shape, iter_result1.runner_name))
                return OutputCompareResult(not failed, max_absdiff, max_reldiff)