Beispiel #1
0
    def call_impl(self):
        uff_model, input_names, input_shapes, output_names = self.uff_loader()

        builder = trt.Builder(get_trt_logger())
        network = builder.create_network()
        parser = trt.UffParser()
        # Input names should come from the converter, as a preprocessing script may have been applied to the frozen model.
        for name, shape in zip(input_names, input_shapes):
            # Default order is NCHW, only set to NHWC if we're reasonably certain that it is.
            input_order = self.uff_order
            if not self.uff_order:
                input_order = trt.UffInputOrder.NCHW
                if FormatManager.determine_format(shape) == DataFormat.NHWC:
                    input_order = trt.UffInputOrder.NHWC
            shape = shape[1:]
            G_LOGGER.verbose(
                "Registering UFF input: {:} with shape: {:} and input order: {:}"
                .format(name, shape, input_order))
            parser.register_input(name, shape, input_order)

        if output_names and output_names != constants.MARK_ALL:
            for name in output_names:
                G_LOGGER.verbose("Registering UFF output: " + str(name))
                parser.register_output(name)

        G_LOGGER.info(
            "Parsing UFF model with inputs: {:} and outputs: {:}".format(
                input_names, output_names))
        success = parser.parse_buffer(uff_model, network)
        if not success:
            G_LOGGER.critical("Could not parse UFF correctly")
        return builder, network, parser, input_shapes[0][0]
Beispiel #2
0
 def call_impl(self):
     with util.FreeOnException(
             create_network(
                 explicit_precision=self.explicit_precision,
                 explicit_batch=self.explicit_batch)) as (builder, network):
         parser = trt.OnnxParser(network, trt_util.get_trt_logger())
         return builder, network, parser
Beispiel #3
0
    def call_impl(self):
        """
        Returns:
            trt.ICudaEngine: The deserialized engine.
        """
        buffer, owns_buffer = util.invoke_if_callable(self._serialized_engine)

        trt.init_libnvinfer_plugins(trt_util.get_trt_logger(), "")
        with contextlib.ExitStack() as stack, trt.Runtime(trt_util.get_trt_logger()) as runtime:
            if owns_buffer:
                try:
                    buffer.__enter__ # IHostMemory is freed only in __exit__
                except AttributeError:
                    pass
                else:
                    stack.enter_context(buffer)

            engine = runtime.deserialize_cuda_engine(buffer)
            if not engine:
                G_LOGGER.critical("Could not deserialize engine. See log for details.")
            return engine
Beispiel #4
0
    def __call__(self):
        builder = trt.Builder(get_trt_logger())
        network = builder.create_network()
        parser = trt.CaffeParser()

        parser.parse(deploy=self.deploy,
                     model=self.model,
                     network=network,
                     dtype=self.dtype)

        if self.outputs and self.outputs != constants.MARK_ALL:
            trt_util.mark_outputs(network, self.outputs)

        return builder, network, parser, self.batch_size
Beispiel #5
0
 def call_impl(self):
     """
     Returns:
         (trt.Builder, trt.INetworkDefinition): The builder and empty network.
     """
     with util.FreeOnException([trt.Builder(trt_util.get_trt_logger())]) as (builder, ):
         network_flags = 0
         if self.explicit_batch:
             network_flags |= 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
         if self.explicit_precision:
             network_flags |= 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)
         network = builder.create_network(flags=network_flags)
         if network is None:
             G_LOGGER.critical("Invalid network. See logging output above for details.")
         return builder, network
Beispiel #6
0
    def __call__(self):
        builder = trt.Builder(get_trt_logger())
        network = builder.create_network()
        parser = trt.CaffeParser()

        model_tensors = parser.parse(deploy=self.deploy,
                                     model=self.model,
                                     network=network,
                                     dtype=self.dtype)

        if self.outputs and self.outputs != constants.MARK_ALL:
            for output in self.outputs:
                network.mark_output(model_tensors.find(output))

        return builder, network, parser, self.batch_size
Beispiel #7
0
    def activate_impl(self):
        """
        Vars:
            engine (trt.ICudaEngine):
                    The engine tracked by this runner. The TrtLegacyRunner OWNS the engine it
                    manages, and therefore is responsible for it's destruction. Do not free the engine outside of the
                    runner, or it will result in a double free.
            context (trt.IExecutionContext): The context used for inference.
            input_buffers (Dict[str, TrtLegacyRunner.HostDeviceMem]):
                    A mapping of binding names to HostDeviceMem objects for input buffers.
            output_buffers (Dict[str, TrtLegacyRunner.HostDeviceMem]):
                    A mapping of binding names to HostDeviceMem objects for output buffers.
            bindings (List[int]): A list of device pointers for engine bindings.
            stream (cuda.Stream): The CUDA stream that this runner will use for inference.
        """

        # Only initialize GPU after this runner is activated.
        # Allocates all buffers required for an engine, i.e. host/device input_buffers/output_buffers.
        def allocate_buffers(engine):
            input_buffers = OrderedDict()
            output_buffers = OrderedDict()
            bindings = []
            stream = cuda.Stream()
            G_LOGGER.verbose("Using batch size: " +
                             str(engine.max_batch_size) +
                             " during buffer allocation")
            for binding in engine:
                shape = (engine.max_batch_size, ) + tuple(
                    engine.get_binding_shape(binding))
                dtype = engine.get_binding_dtype(binding)

                device_mem = cuda.DeviceArray(shape=shape,
                                              dtype=trt.nptype(dtype))
                G_LOGGER.extra_verbose("Tensor: "
                                       "{:35} | Allocated: {:}".format(
                                           binding, device_mem))

                if engine.binding_is_input(binding):
                    input_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        None, device_mem)
                else:
                    host_mem = np.empty(shape=shape, dtype=trt.nptype(dtype))
                    output_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        host_mem, device_mem)
            return input_buffers, output_buffers, stream

        # Always try reading the engine first, or, failing that, build it.
        if self.load_engine:
            with open(self.load_engine,
                      "rb") as f, trt.Runtime(get_trt_logger()) as runtime:
                G_LOGGER.info("Reading engine from {:}".format(
                    self.load_engine))
                self.engine = runtime.deserialize_cuda_engine(f.read())
        else:
            trt.init_libnvinfer_plugins(get_trt_logger(), "")
            builder, network, parser, model_batch_size = self.network_loader()
            with builder, network, parser, builder.create_builder_config(
            ) as config:
                builder.max_batch_size = int(self.max_batch_size
                                             or model_batch_size or 1)

                config.max_workspace_size = int(self.max_workspace_size)

                if not self.tf32:
                    with contextlib.suppress(AttributeError):
                        config.clear_flag(trt.BuilderFlag.TF32)
                if self.fp16:
                    config.flags = 1 << int(trt.BuilderFlag.FP16)

                if not network:
                    G_LOGGER.critical("Invalid network")
                G_LOGGER.super_verbose(lambda: trt_util.str_from_network(
                    network) or "Finished logging network")

                if self.layerwise:
                    # In layerwise mode, every layer becomes an output.
                    G_LOGGER.info(
                        "Running in layerwise mode. Marking {:} layers as outputs"
                        .format(network.num_layers))
                    for layer in network:
                        for index in range(layer.num_outputs):
                            out = layer.get_output(index)
                            if not out.is_network_output:
                                network.mark_output(out)

                G_LOGGER.info(
                    "Building engine: max workspace size={:} bytes, max batch size={:}, fp16={:}, "
                    "tf32={:}".format(config.max_workspace_size,
                                      builder.max_batch_size, self.fp16,
                                      self.tf32))
                self.engine = builder.build_engine(network, config)

        if not self.engine:
            G_LOGGER.critical(
                "Invalid Engine. Please ensure the engine was built correctly")

        if self.engine_path:
            with open(self.engine_path, "wb") as f:
                G_LOGGER.info("Writing engine to {:}".format(self.engine_path))
                f.write(self.engine.serialize())

        self.context = self.engine.create_execution_context()
        self.input_buffers, self.output_buffers, self.stream = allocate_buffers(
            self.engine)