def infer(self, feed_dict, output):
        for name in self.engine:
            if name in feed_dict:
                in_out = [feed_dict[name]]
            elif isinstance(output, tuple):
                in_out = [
                    output[i].detach().cpu().numpy()
                    for i in range(len(output))
                ]
            else:
                in_out = [output.detach().cpu().numpy()]

            binding = self.engine[name]

            # Only set shapes if required
            for i in range(len(in_out)):
                shape = in_out[i].shape
                if self.engine.is_shape_binding(binding) and is_shape_dynamic(
                        self.context.get_shape(binding)):
                    logging.debug(
                        "Setting shape binding: {:} (index: {:}) to: {:}".
                        format(name, binding, in_out[i]))
                    self.context.set_shape_input(binding, in_out[i])
                elif is_shape_dynamic(self.context.get_binding_shape(binding)):
                    logging.debug(
                        "Setting binding: {:} (index: {:}) to shape: {:}".
                        format(name, binding, shape))
                    self.context.set_binding_shape(binding, shape)

        # Check
        if not self.context.all_binding_shapes_specified:
            logging.critical(
                "Some input shapes were not specified.\nNote: Inputs are: {:}".
                format(self.get_input_metadata()))
        if not self.context.all_shape_inputs_specified:
            logging.critical(
                "Some shape inputs were not specified.\nNote: Inputs are: {:}".
                format(self.get_input_metadata()))

        bindings_per_profile = self.engine.num_bindings // self.engine.num_optimization_profiles
        start_binding = self.context.active_optimization_profile * bindings_per_profile
        end_binding = start_binding + bindings_per_profile

        # Resize buffers so they are the appropriate size.
        for binding in range(start_binding, end_binding):
            shape = tuple(self.context.get_binding_shape(binding))
            self.buffers.resize(self.engine[binding], shape)

        bindings = self.buffers.get_bindings()

        start = time.perf_counter()
        self.buffers.copy_inputs(feed_dict, self.stream)
        self.context.execute_async_v2(bindings=bindings,
                                      stream_handle=self.stream.handle)
        self.buffers.copy_outputs(self.stream)
        self.stream.synchronize()
        end = time.perf_counter()

        self.inference_time = end - start
        return self.buffers.get_outputs()
コード例 #2
0
    def __call__(self):
        class DummyContextManager(object):
            def __enter__(self):
                return None

            def __exit__(self, exc_type, exc_value, traceback):
                return None

        network_parser = self.network_loader()
        try:
            network, parser = network_parser
            assert isinstance(network, trt.INetworkDefinition)
        except (ValueError, AssertionError):
            network = network_parser
            parser = DummyContextManager()

        with trt.Builder(TRT_LOGGER) as builder, network, parser:
            if self.preprocess_network:
                logging.debug("Applying network preprocessing: {:}".format(
                    self.preprocess_network))
                self.preprocess_network(network)

            if self.layerwise:
                TensorRTRunnerV2.mark_layerwise(network)

            if logging.getEffectiveLevel() <= logging.DEBUG:
                TensorRTRunnerV2.log_network(network)

            config = builder.create_builder_config()
            profile = TensorRTRunnerV2.build_profile(builder, network,
                                                     self.profile_shapes)
            config.add_optimization_profile(profile)

            config.max_workspace_size = int(self.max_workspace_size)
            if self.fp16_mode:
                config.flags = 1 << int(trt.BuilderFlag.FP16)
            if self.int8_mode:
                config.flags = config.flags | 1 << int(trt.BuilderFlag.INT8)
                if not network.has_explicit_precision:
                    if not self.calibrator:
                        logging.critical(
                            "Network does not have explicit precision. A calibrator must be provided in order to use int8 mode."
                        )
                    self.calibrator.set_input_metadata(
                        get_input_metadata_from_profile(profile, network))
                    config.int8_calibrator = self.calibrator

            logging.debug("Using builder configuration flags: {:}".format(
                config.flags))
            logging.info(
                "Building engine: max workspace size={:} bytes, fp16={:}, int8={:}, layerwise={:}"
                .format(self.max_workspace_size, self.fp16_mode,
                        self.int8_mode, self.layerwise))
            engine = builder.build_engine(network, config)
            self.written_engine_path = write_timestamped(
                contents=lambda: engine.serialize(),
                dir=self.write_engine,
                name="tensorrt_runner_v2.engine")
            return engine
 def get_profile_shape(name):
     if name not in profile_shapes:
         return None
     shapes = profile_shapes[name]
     if not isinstance(shapes, list) or len(shapes) != 3:
         logging.critical(
             "Profile values must be a list containing exactly 3 shapes (tuples or Dims), but received shapes: {:} for input: {:}.\nNote: profile was: {:}.\nNote: Network inputs were: {:}"
             .format(shapes, name, profile_shapes,
                     TensorRTRunnerV2.get_network_inputs(network)))
     return shapes
コード例 #4
0
    def __call__(self):
        network = TensorRTRunnerV2.create_network(
            explicit_precision=self.explicit_precision)

        parser = trt.OnnxParser(network, TRT_LOGGER)
        success = parser.parse(self.onnx_loader().SerializeToString())
        if not success:
            for index in range(parser.num_errors):
                logging.error(parser.get_error(index))
            logging.critical("Could not parse ONNX correctly")

        return network, parser
 def create_network(explicit_batch=True, explicit_precision=False):
     with trt.Builder(TRT_LOGGER) as builder:
         network_flags = 0
         if explicit_batch:
             network_flags = 1 << int(
                 trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
         if explicit_precision:
             network_flags = network_flags | (1 << int(
                 trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION))
         network = builder.create_network(flags=network_flags)
         if network is None:
             logging.critical("Invalid network")
         return network
    def __init__(self, model_loader=None, plugins=None, name=None):
        """
        Creates a runner that manages a single TensorRT engine.

        Args:
            model_loader (Callable() -> trt.ICudaEngine): A callable that can supply a TensorRT engine.

        Optional Args:
            max_workspace_size (int): The maximum workspace size in bytes.
            plugins (List[str]): A list of paths to plugin libraries to load before inference.
            name (str): The human-readable name to use for this runner.
        """
        set_trt_logging_level(logging.getEffectiveLevel())

        def load_plugins():
            import ctypes

            for plugin in plugins:
                path = os.path.abspath(plugin)
                logging.info("Loading plugin library: {:}".format(path))
                ctypes.CDLL(path)

        # Load any user-supplied plugin libraries. This must happen before everything else, including engine deserialization.
        if plugins:
            load_plugins()

        # Choose a unique name for this runner.
        super().__init__(
            default_value(
                name,
                "trt-v2-runner-{:}".format(TensorRTRunnerV2.total_runners)))
        TensorRTRunnerV2.total_runners += 1
        logging.debug("Creating {:}".format(self.name))

        self.model_loader = model_loader

        self.engine = self.model_loader()
        if not self.engine:
            logging.critical(
                "Invalid Engine. Please ensure the engine was built correctly."
            )

        self.buffers = Buffers.from_engine(self.engine)
        self.stream = cuda.Stream()

        self.context = self.engine.create_execution_context()
コード例 #7
0
ファイル: test_deploy_export.py プロジェクト: rlanka4/NeMo
    def __test_export_route(self, module, out_name, mode, input_example=None):
        # select correct extension based on the output format
        ext = {
            DF.ONNX: ".onnx",
            DF.TRTONNX: ".trt.onnx",
            DF.PYTORCH: ".pt",
            DF.TORCHSCRIPT: ".ts"
        }.get(mode, ".onnx")
        out = Path(f"{out_name}{ext}")
        out_name = str(out)

        if out.exists():
            os.remove(out)

        module.eval()
        outputs_fwd = (module.forward(*tuple(input_example.values()))
                       if isinstance(input_example, OrderedDict) else
                       (module.forward(
                           *input_example) if isinstance(input_example, tuple)
                        else module.forward(input_example)
                        if input_example is not None else None))

        deploy_input_example = (tuple(input_example.values()) if isinstance(
            input_example, OrderedDict) else input_example)
        self.nf.deployment_export(
            module=module,
            output=out_name,
            input_example=deploy_input_example,
            d_format=mode,
            output_example=outputs_fwd,
        )

        tol = 5.0e-3
        assert out.exists() == True

        if mode == DF.TRTONNX:

            data_loader = DefaultDataLoader()
            loader_cache = DataLoaderCache(data_loader)
            profile_shapes = OrderedDict()
            names = list(module.input_ports) + list(module.output_ports)
            names = list(
                filter(
                    lambda x: x not in
                    (module._disabled_deployment_input_ports | module.
                     _disabled_deployment_output_ports),
                    names,
                ))
            if isinstance(input_example, tuple):
                si = [
                    tuple(input_example[i].shape)
                    for i in range(len(input_example))
                ]
            elif isinstance(input_example, OrderedDict):
                si = [
                    tuple(input_example.values())[i].shape
                    for i in range(len(input_example))
                ]
            else:
                si = [tuple(input_example.shape)]
            if isinstance(outputs_fwd, tuple):
                fi = [
                    tuple(outputs_fwd[i].shape)
                    for i in range(len(outputs_fwd))
                ]
            else:
                fi = [tuple(outputs_fwd.shape)]
            si = si + fi
            i = 0
            for name in names:
                profile_shapes[name] = [si[i]] * 3
                i = i + 1

            onnx_loader = OnnxFileLoader(out_name)
            network_loader = OnnxNetworkLoader(onnx_loader,
                                               explicit_precision=False)
            model_loader = BuildEngineLoader(
                network_loader,
                max_workspace_size=1 << 30,
                fp16_mode=False,
                int8_mode=False,
                profile_shapes=profile_shapes,
                write_engine=None,
                calibrator=None,
                layerwise=False,
            )

            with TensorRTRunnerV2(model_loader=model_loader) as active_runner:
                input_metadata = active_runner.get_input_metadata()
                if input_metadata is None:
                    logging.critical(
                        "For {:}, get_input_metadata() returned None!".format(
                            active_runner.name))
                logging.debug("Runner Inputs: {:}".format(input_metadata))
                feed_dict = loader_cache.load(iteration=0,
                                              input_metadata=input_metadata,
                                              input_example=input_example)
                inputs = dict()
                input_names = list(input_metadata.keys())
                for i in range(len(input_names)):
                    input_name = input_names[i]
                    if input_name in module._disabled_deployment_input_ports:
                        continue
                    inputs[input_name] = (
                        input_example[input_name].cpu().numpy() if isinstance(
                            input_example, OrderedDict) else
                        (input_example[i].cpu().numpy() if isinstance(
                            input_example, tuple) else
                         input_example.cpu().numpy()))

                out_dict = active_runner.infer(feed_dict=feed_dict,
                                               output=outputs_fwd)
                for ov in out_dict.values():
                    outputs_scr = torch.from_numpy(ov).cuda()
                    break

                outputs = []
                outputs.append(copy.deepcopy(out_dict))
                logging.debug("Received outputs: {:}".format([
                    "{:}: {:}".format(name, out.shape)
                    for name, out in out_dict.items()
                ]))
                logging.info("Output Buffers: {:}".format(outputs))

            inpex = []
            for ie in feed_dict.values():  # loader_cache.cache[0].values():
                if ie.dtype.type is np.int32:
                    inpex.append(torch.from_numpy(ie).long().cuda())
                else:
                    inpex.append(torch.from_numpy(ie).cuda())
                if len(inpex) == len(input_example):
                    break
            inpex = tuple(inpex)
            outputs_fwd = module.forward(*inpex)

        elif mode == DF.ONNX:
            # Must recompute because *module* might be different now
            outputs_fwd = (
                module.forward(*tuple(input_example.values())) if isinstance(
                    input_example, OrderedDict) else
                (module.forward(*input_example) if isinstance(
                    input_example, tuple) else module.forward(input_example)))
            sess_options = ort.SessionOptions()
            sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
            ort_session = ort.InferenceSession(out_name, sess_options,
                                               ['CUDAExecutionProvider'])
            print('Execution Providers: ', ort_session.get_providers())
            inputs = dict()
            input_names = list(module.input_ports)
            ort_inputs = ort_session.get_inputs()
            for i in range(len(input_names)):
                input_name = input_names[i]
                if input_name in module._disabled_deployment_input_ports:
                    continue
                inputs[input_name] = (input_example[input_name].cpu().numpy()
                                      if isinstance(input_example,
                                                    OrderedDict) else
                                      (input_example[i].cpu().numpy()
                                       if isinstance(input_example, tuple) else
                                       input_example.cpu().numpy()))
            outputs_scr = ort_session.run(None, inputs)
            outputs_scr = torch.from_numpy(outputs_scr[0]).cuda()
        elif mode == DF.TORCHSCRIPT:
            scr = torch.jit.load(out_name)
            if isinstance(module, nemo.backends.pytorch.tutorials.TaylorNet):
                input_example = torch.randn(4, 1).cuda()
                outputs_fwd = module.forward(input_example)
            outputs_scr = (
                module.forward(*tuple(input_example.values())) if isinstance(
                    input_example, OrderedDict) else
                (module.forward(*input_example) if isinstance(
                    input_example, tuple) else module.forward(input_example)))
        elif mode == DF.PYTORCH:
            module.load_state_dict(torch.load(out_name))
            module.eval()
            outputs_scr = (
                module.forward(*tuple(input_example.values())) if isinstance(
                    input_example, OrderedDict) else
                (module.forward(*input_example) if isinstance(
                    input_example, tuple) else module.forward(input_example)))

        outputs_scr = (outputs_scr[0] if isinstance(outputs_scr, tuple)
                       or isinstance(outputs_scr, list) else outputs_scr)
        outputs_fwd = (outputs_fwd[0] if isinstance(outputs_fwd, tuple)
                       or isinstance(outputs_fwd, list) else outputs_fwd)

        assert (outputs_scr - outputs_fwd).norm(p=2) < tol

        if out.exists():
            os.remove(out)
    def build_profile(builder,
                      network,
                      profile_shapes,
                      default_shape_value=DEFAULT_SHAPE_VALUE):
        def override_shape(shape):
            return tuple([
                DEFAULT_SHAPE_VALUE if is_dimension_dynamic(dim) else dim
                for dim in shape
            ])

        def get_profile_shape(name):
            if name not in profile_shapes:
                return None
            shapes = profile_shapes[name]
            if not isinstance(shapes, list) or len(shapes) != 3:
                logging.critical(
                    "Profile values must be a list containing exactly 3 shapes (tuples or Dims), but received shapes: {:} for input: {:}.\nNote: profile was: {:}.\nNote: Network inputs were: {:}"
                    .format(shapes, name, profile_shapes,
                            TensorRTRunnerV2.get_network_inputs(network)))
            return shapes

        profile = builder.create_optimization_profile()
        for idx in range(network.num_inputs):
            inp = network.get_input(idx)

            if inp.is_shape_tensor:
                shapes = get_profile_shape(inp.name)
                if not shapes:
                    rank = inp.shape[0]
                    shapes = [(DEFAULT_SHAPE_VALUE, ) * rank] * 3
                    logging.warning(
                        "Setting shape input to {:}. If this is incorrect, for shape input: {:}, please provide tuples for min, opt, and max shapes containing {:} elements"
                        .format(shapes[0], inp.name, rank),
                        mode=logging_mode.ONCE,
                    )
                min, opt, max = shapes
                profile.set_shape(inp.name, min, opt, max)
                inp.shape = opt
                logging.info(
                    "Setting shape input: {:} values to min: {:}, opt: {:}, max: {:}"
                    .format(inp.name, min, opt, max))
            else:
                shapes = get_profile_shape(inp.name)
                if not shapes:
                    shapes = [override_shape(inp.shape)] * 3
                    logging.warning(
                        "Overriding input shape {:} to {:}. If this is incorrect, for input tensor: {:}, please provide tuples for min, opt, and max shapes containing values: {:} with dynamic dimensions replaced,"
                        .format(inp.shape, shapes[0], inp.name, inp.shape),
                        mode=logging_mode.ONCE,
                    )
                min, opt, max = shapes
                profile.set_shape(inp.name, min, opt, max)
                inp.shape = opt
                logging.info(
                    "Setting input: {:} shape to min: {:}, opt: {:}, max: {:}".
                    format(inp.name, min, opt, max))

        if not profile:
            logging.critical(
                "Profile is not valid, please provide profile data. Note: profile was: {:}"
                .format(profile_shapes))
        return profile
コード例 #9
0
    def __call__(self, index, input_metadata, input_example=None):
        logging.debug("Updating seed to: {:}".format(self.seed + index))
        rng = np.random.RandomState(self.seed + index)

        buffers = OrderedDict()
        i = 0
        for name, (dtype, shape) in input_metadata.items():
            if input_example is not None and (not isinstance(
                    input_example, tuple) or i < len(input_example)):
                if isinstance(input_example, tuple):
                    static_shape = input_example[i].shape
                elif isinstance(input_example, OrderedDict):
                    static_shape = tuple(input_example.values())[i].shape
                else:
                    static_shape = [tuple(input_example.shape)]
            elif is_shape_dynamic(shape):
                if name in self.default_shapes:
                    static_shape = self.default_shapes[name]
                else:
                    static_shape = [
                        self.default_shape_value
                        if is_dimension_dynamic(elem) else elem
                        for elem in shape
                    ]
                if static_shape != shape:
                    if not is_valid_shape_override(static_shape, shape):
                        logging.critical(
                            "Cannot override original shape: {:}, for input: {:} to {:}"
                            .format(shape, name, static_shape))
                    logging.warning(
                        "Input: {:}: Adjusted dynamic shape: {:} to: {:}".
                        format(name, shape, static_shape),
                        mode=logging_mode.ONCE,
                    )
            else:
                if name in self.default_shapes:
                    logging.warning(
                        "Will not override static shape: {:}, for input: {:}".
                        format(shape, name),
                        mode=logging_mode.ONCE,
                    )
                static_shape = shape

            if input_example is not None and (not isinstance(
                    input_example, tuple) or i < len(input_example)):
                if isinstance(input_example, OrderedDict):
                    buffers[name] = list(input_example.values())[i].cpu()
                else:
                    buffers[name] = input_example[i].cpu() if isinstance(
                        input_example, tuple) else input_example.cpu()
            elif np.issubdtype(dtype, np.integer):
                buffers[name] = rng.randint(low=self.int_min,
                                            high=self.int_max,
                                            size=static_shape,
                                            dtype=dtype)
            elif np.issubdtype(dtype, np.bool_):
                buffers[name] = rng.randint(low=0, high=2,
                                            size=static_shape).astype(dtype)
            else:
                buffers[name] = (rng.random_sample(size=static_shape) *
                                 (self.float_max - self.float_min) +
                                 self.float_min).astype(dtype)

            buffers[name] = np.array(
                buffers[name]
            )  # To handle scalars. The above functions return a float if shape is ().

            # If the shape is 1D, and has a length equal to the rank of the provided default shape, it is
            # likely to be a TRT shape tensor, and so should be overriden such that it's value (not shape) is the default shape.
            is_shape_tensor = ((not is_shape_dynamic(shape))
                               and (name in self.default_shapes)
                               and (len(shape) == 1) and
                               (shape[0] == len(self.default_shapes[name])))
            if is_shape_tensor:
                buffers[name] = np.array(self.default_shapes[name],
                                         dtype=dtype)
                logging.warning(
                    "Assuming {:} is a shape tensor. Setting to: {:}".format(
                        name, buffers[name]),
                    mode=logging_mode.ONCE,
                )
            i = i + 1

        return buffers
コード例 #10
0
    def load(self, iteration, input_metadata, input_example=None):
        """
        Load the specified iteration from the cache if present, or generate using the data loader.

        Args:
            iteration (int): The iteration whose data to retrieve.
            input_metadata (OrderedDict[str, Tuple[np.dtype, Tuple[int]]]): Input Metadata, including shape and type information. The loader may attempt to match input_metadata when data in the cache does not exactly match a new set of input_metadata.
        """
        if iteration not in self.cache:
            logging.debug(
                "Iteration {:} not found in cache, generating new buffers for all inputs"
                .format(iteration))
            self.cache[iteration] = self.data_loader(iteration, input_metadata,
                                                     input_example)
            if self.cache[iteration] is None:
                logging.critical(
                    "Received no data from data_loader(iteration, input_metadata) for input_metadata: {:}"
                    .format(input_metadata))
        else:
            logging.info("Found iteration {:} in cache".format(iteration))

        feed_dict = OrderedDict()
        for index, (name, (dtype, shape)) in enumerate(input_metadata.items()):
            cached_name = find_in_dict(name, self.cache[iteration], index)
            if cached_name is None:
                logging.warning(
                    "Could not find input: {:} in cache, regenerating buffers".
                    format(name))
                self.cache[iteration] = self.data_loader(
                    iteration, input_metadata, input_example)
                cached_name = name

            buffer = self.cache[iteration][cached_name]

            if dtype != buffer.dtype:
                logging.warning(
                    "Cached buffer data type does not match data type for input: {:}. Note: Cached type: {:}, input type: {:}. Attempting to cast"
                    .format(name, buffer.dtype, dtype))
                buffer = buffer.astype(dtype)

            if not is_valid_shape_override(buffer.shape, shape):
                logging.warning(
                    "Cached buffer shape does not match shape for input. Note: Cached shape: {:}, input shape: {:}."
                    .format(buffer.shape, shape))
                # Try to permute the shape to match
                try:
                    perm = FormatManager.permutation(
                        FormatManager.deduce_format(buffer.shape),
                        FormatManager.deduce_format(shape))
                    new_shape = FormatManager.convert(
                        tuple(buffer.shape),
                        FormatManager.deduce_format(shape))
                    logging.warning(
                        "Attempting to permute shape: {:} using permutation {:}. New shape: {:}"
                        .format(buffer.shape, perm, new_shape))
                    buffer = np.transpose(buffer, perm)
                except NotImplementedError as err:
                    # If the FormatManager does not recognize the format, skip permutation.
                    logging.info("Skipping permutation due to {:}".format(err))
                except KeyError as err:
                    # If the FormatManager cannot generate the permutation for the format combination, skip permutation.
                    logging.info("Skipping permutation due to {:}".format(err))

            feed_dict[name] = buffer
        return feed_dict