Example #1
0
def build_engine(onnx_file_path, engine_file_path):
    with trt.Builder(TRT_LOGGER) as builder:
        builder.max_workspace_size = 1 << 20
        builder.max_batch_size = 1

        with builder.create_network() as network:
            with trt.OnnxParser(network, TRT_LOGGER) as parser:

                with open(onnx_file_path, 'rb') as model:
                    parser.parse(model.read())

                engine = builder.build_cuda_engine(network)

                with open(engine_file_path, 'wb') as f:
                    f.write(engine.serialize())
Example #2
0
def build_engine_onnx(model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
    ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        #         print(builder.platform_has_fast_fp16)
        builder.fp16_mode = True
        builder.max_workspace_size = common.GiB(1)
        # Load the Onnx model and parse it in order to populate the TensorRT network.
        with open(model_file, 'rb') as model:
            parser.parse(model.read())
            #parser.parse returns a bool, and we were not checking it originally.
            # if not parser.parse(model.read()):
            #     print(parser.get_error(0))
            # print(network.get_layer(network.num_layers -1).get_output(0).shape)
            # network.mark_output(network.get_layer(network.num_layers -1).get_output(0))
        return builder.build_cuda_engine(network)
Example #3
0
def build_engine(onnx_path, shape=[1,3,512,512]):

    """
    This is the function to create the TensorRT engine
    Args:
        onnx_path : Path to onnx_file. 
        shape : Shape of the input of the ONNX file. 
    """
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(1) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = (256 << 20)
        with open(onnx_path, 'rb') as model:
            parser.parse(model.read())
        network.get_input(0).shape = shape
        engine = builder.build_cuda_engine(network)
        return engine
Example #4
0
def build_engine(model_path):
    with trt.Builder(TRT_LOGGER) as builder, \
        builder.create_network() as network, \
        trt.OnnxParser(network, TRT_LOGGER) as parser:
        """
        Needed, otherwise:
        [TensorRT] ERROR: Internal error: could not find any implementation for node (Unnamed Layer* 34) [Matrix Multiply], try increasing the workspace size with IBuilder::setMaxWorkspaceSize()
        [TensorRT] ERROR: ../builder/tacticOptimizer.cpp (1461) - OutOfMemory Error in computeCosts: 0
        """
        builder.max_workspace_size = 1 << 30
        builder.max_batch_size = 1
        with open(model_path, "rb") as f:
            parser.parse(f.read())
        engine = builder.build_cuda_engine(network)
        return engine
Example #5
0
def build_engine_onnx(model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            common.EXPLICIT_BATCH) as network, trt.OnnxParser(
                network, TRT_LOGGER) as parser:
        builder.max_workspace_size = common.GiB(1)
        # Load the Onnx model and parse it in order to populate the TensorRT network.
        with open(model_file, 'rb') as model:
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        builder.fp16_mode = True
        builder.strict_type_constraints = True
        return builder.build_cuda_engine(network)
Example #6
0
def build_engine_onnx(model_file):
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network(EXPLICIT_BATCH)
    config = builder.create_builder_config()
    parser = trt.OnnxParser(network, TRT_LOGGER)

    config.max_workspace_size = GiB(1)
    # Load the Onnx model and parse it in order to populate the TensorRT network.
    with open(model_file, "rb") as model:
        if not parser.parse(model.read()):
            print("ERROR: Failed to parse the ONNX file.")
            for error in range(parser.num_errors):
                print(parser.get_error(error))
            return None
    return builder.build_engine(network, config)
Example #7
0
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(cfg.TRT_LOGGER) as builder, builder.create_network(
        ) as network, trt.OnnxParser(network, cfg.TRT_LOGGER) as parser:
            builder.max_workspace_size = GiB(args.model_memory)
            builder.max_batch_size = args.max_batch_size

            if args.precision == 'fp16':
                # set to fp16
                print('force to fp16')
                builder.fp16_mode = True
                builder.strict_type_constraints = True
            elif args.precision == 'int8':
                # set to int8
                builder.int8_mode = True
                '''
				NUM_IMAGES_PER_BATCH = 5 
				batch = ImageBatchStream(NUM_IMAGES_PER_BATCH, calibration_files)
				Int8_calibration = EntropyCalibrator(['input_node_name'],batchstream)
				trt_builder.int8_calibrator = Int8_calibrator
				'''
            else:
                pass

            # Parse model file
            if not os.path.exists(cfg.onnx_file_path):
                print(
                    'ONNX file {} not found, please run pytorch2ONNX.py first to generate it.'
                    .format(cfg.onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(
                cfg.onnx_file_path))
            with open(cfg.onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.
                  format(cfg.onnx_file_path))

            print(network.num_layers)
            network.mark_output(
                network.get_layer(network.num_layers - 1).get_output(0))

            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(cfg.engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine
Example #8
0
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder,\
              builder.create_network() as network, \
              trt.OnnxParser(network, TRT_LOGGER) as parser:

            builder.max_workspace_size = (FLAGS.vram) * 1 << 30  # 1GB
            builder.max_batch_size = FLAGS.max_batch_size

            if FLAGS.precision == 'fp16':
                # set to fp16
                print('force to fp16')
                builder.fp16_mode = True
                builder.strict_type_constraints = True
            elif FLAGS.precision == 'int8':
                # set to int8

                pass
                # builder.int8_mode = True
                '''
                NUM_IMAGES_PER_BATCH = 5 
                batch = ImageBatchStream(NUM_IMAGES_PER_BATCH, calibration_files)
                Int8_calibration = EntropyCalibrator(['input_node_name'],batchstream)
                trt_builder.int8_calibrator = Int8_calibrator
                '''
            else:
                pass

            if not os.path.exists(onnx_file_path):
                print(
                    'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'
                    .format(onnx_file_path))
                exit(0)

            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')

            print('Building an engine from file {}; this may take a while...'.
                  format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")

            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine
Example #9
0
    def _parser_model_onnx(self, model_info):
        g_logger = trt.Logger(trt.Logger.WARNING)
        with trt.Builder(g_logger) as builder, builder.create_network() as network, \
                trt.OnnxParser(network, g_logger) as parser:

            for i in model_info.inputs:
                if i.data_format == 'channels_last':
                    raise Exception('The data format: {} is not support'.format(i.data_format))

            _LOGGER.info('model_to_plan:: Begin to parse network!')
            with open(self.model_path, 'rb') as model:
                result = parser.parse(model.read())
            if not result:
                raise Exception('model_to_plan:: Parse network from uff file failure!')

            self._parser_model(builder=builder, network=network, model_info=model_info)
Example #10
0
 def __init__(self, model_path):
     TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
     with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
             common.EXPLICIT_BATCH) as network, trt.OnnxParser(
                 network, TRT_LOGGER) as parser:
         builder.max_workspace_size = common.GiB(1)
         # Load the Onnx model and parse it in order to populate the TensorRT network.
         with open(model_path, 'rb') as model:
             if not parser.parse(model.read()):
                 print('ERROR: Failed to parse the ONNX file.')
                 for error in range(parser.num_errors):
                     print(parser.get_error(error))
         self.engine = builder.build_cuda_engine(network)
         self.context = self.engine.create_execution_context()
         self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers(
             self.engine)
Example #11
0
def build_engine(model_path):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(flags = 1) as network, \
    trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = 1 << 32
        builder.max_batch_size = 1
        builder.fp16_mode = 1

        with open(model_path, 'rb') as f:
            value = parser.parse(f.read())
            print("Parser: ", value)

        engine = builder.build_cuda_engine(network)
        # print_network(network)

        print(engine)
        return engine
def build_trt_engine(onnx_module, args):
    logger = trt.Logger()

    network_flags = 1 << (int)(
        trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)

    with trt.Builder(logger) as builder, builder.create_network(
            network_flags) as network, trt.OnnxParser(network,
                                                      logger) as parser:
        builder.max_workspace_size = 2**31  # 2 GB
        builder.max_batch_size = args.batch_dim
        builder.fp16_mode = args.precision != 'fp32'
        if args.precision == 'int8':
            builder.int8_mode = True
            builder.int8_calibrator = Int8Calibrator(args)

        print('parsing ONNX...')
        onnx_buf = io.BytesIO()
        onnx.save(onnx_module, onnx_buf)
        onnx_buf.seek(0)
        if not parser.parse(onnx_buf.read()):
            print(parser.num_errors, 'parser errors:')
            for i in range(parser.num_errors):
                print(parser.get_error(i))

        print('inputs:')
        inputs = {
            t.name: t.shape
            for t in [network.get_input(i) for i in range(network.num_inputs)]
        }
        pprint(inputs)
        print('outputs:')
        outputs = {
            t.name: t.shape
            for t in
            [network.get_output(i) for i in range(network.num_outputs)]
        }
        pprint(outputs)

        print('building CUDA engine...')
        engine = builder.build_cuda_engine(network)
        if engine:
            print('saving CUDA engine to', args.trt_path)
            with open(args.trt_path, 'wb') as mf:
                mf.write(engine.serialize())

        return engine
Example #13
0
def onnx_to_trt(folder, model_name, fp=16):
    print('--- fp_{} ---'.format(fp))

    EXPLICIT_BATCH = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    F = EXPLICIT_BATCH

    NUM_IMAGES_PER_BATCH = 1

    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            F) as network, trt.OnnxParser(
                network, TRT_LOGGER) as parser, builder.create_builder_config(
                ) as config:

        builder.max_batch_size = NUM_IMAGES_PER_BATCH
        builder.max_workspace_size = 1 << 30
        if fp == 16:
            builder.fp16_mode = True
        builder.strict_type_constraints = True

        config.max_workspace_size = 1 << 30
        if fp == 16:
            config.flags |= 1 << int(trt.BuilderFlag.FP16)

        config.flags |= 1 << int(trt.BuilderFlag.STRICT_TYPES)

        with open("./{}/{}.onnx".format(folder, model_name), 'rb') as model:
            PARSED = parser.parse(model.read())
            if not PARSED:
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
            else:
                for i in network:
                    print(i.name)

                inputs = [
                    network.get_input(i) for i in range(network.num_inputs)
                ]
                #print('inputs => ', inputs)
                opt_profiles = create_optimization_profiles(builder, inputs)
                add_profiles(config, inputs, opt_profiles)

                engine = builder.build_engine(network, config)
            with open(
                    './{}/{}.fp{}.TEST.engine'.format(folder, model_name, fp),
                    "wb") as engine_file:
                engine_file.write(engine.serialize())
    return engine
Example #14
0
def build_engine(trt_file_path):
    # initialize TensorRT engine and parse ONNX model
    builder = trt.Builder(TRT_LOGGER)
    explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    network = builder.create_network(explicit_batch)
    parser = trt.OnnxParser(network, TRT_LOGGER)

    TRTbin = TRT_FILE_PATH
    with open(TRTbin, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
        print('Beginning TRT file parsing')
        engine = runtime.deserialize_cuda_engine(f.read())
    print('Completed parsing of TRT file')

    context = engine.create_execution_context()
    print("Completed creating Engine")

    return engine, context
    def initialize(self):
        """
        Parse input ONNX file to a TRT network. Apply layer optimizations and fusion plugins on network.
        """

        # Query system id for architecture
        self.system = get_system()
        self.gpu_arch = self.system.arch

        # Create network.
        self.network = self.builder.create_network(
            1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))

        # Parse from onnx file.
        parser = trt.OnnxParser(self.network, self.logger)

        rn50_gs = RN50GraphSurgeon(self.model_path, self.gpu_arch,
                                   self.device_type, self.precision,
                                   self.cache_file, self.need_calibration)
        model = rn50_gs.process_onnx()
        success = parser.parse(onnx._serialize(model))
        if not success:
            raise RuntimeError(
                "ResNet50 onnx model processing failed! Error: {:}".format(
                    parser.get_error(0).desc()))
        # unmarking topk_layer_output_value, just leaving topk_layer_output_index
        assert self.network.num_outputs == 2, "Two outputs expected"
        assert self.network.get_output(0).name == "topk_layer_output_value",\
            "unexpected tensor: {}".format(self.network.get_output(0).name)
        assert self.network.get_output(1).name == "topk_layer_output_index",\
            "unexpected tensor: {}".format(self.network.get_output(1).name)
        logging.info("Unmarking output: {:}".format(
            self.network.get_output(0).name))
        self.network.unmark_output(self.network.get_output(0))

        # Set input dtype and format
        input_tensor = self.network.get_input(0)
        if self.input_dtype == "int8":
            input_tensor.dtype = trt.int8
            input_tensor.dynamic_range = (-128, 127)
        if self.input_format == "linear":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)
        elif self.input_format == "chw4":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4)

        self.initialized = True
Example #16
0
def build_engine(onnx_path):
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network()
    parser = trt.OnnxParser(network, TRT_LOGGER)
    builder.max_workspace_size = 1 << 20  # 1024MB
    builder.max_batch_size = 1
    #    builder.fp16_mode = True
    print(onnx_path, file=sys.stderr)
    with open(onnx_path, 'rb') as model:
        parser.parse(model.read())
    out_size = 2895
    isize = network.get_input(0).shape
    last_layer = network.get_layer(network.num_layers - 1)
    network.mark_output(last_layer.get_output(0))
    print(network, file=sys.stderr)
    engine = builder.build_cuda_engine(network)
    return engine
Example #17
0
    def build_engine(max_batch_size, save_engine):
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, \
                builder.create_network(1) as network, \
                trt.OnnxParser(network, TRT_LOGGER) as parser:

            # parse onnx model file
            if not os.path.exists(onnx_file_path):
                quit('ONNX file {} not found'.format(onnx_file_path))
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
                assert network.num_layers > 0, 'Failed to parse ONNX model. \
                            Please check if the ONNX model is compatible '
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))

            # build trt engine
            if int8_mode:
                builder.max_batch_size = max_batch_size
                builder.int8_mode = int8_mode
                builder.max_workspace_size = 1 << 30  # 1GB
                assert calibration_stream, 'Error: a calibration_stream should be provided for int8 mode'
                builder.int8_calibrator = Calibrator(calibration_stream, calibration_table_path)
                engine = builder.build_cuda_engine(network)
                print('Int8 mode enabled')
            if fp16_mode:
                builder.max_batch_size = max_batch_size
                builder.max_workspace_size = 1 << 30  # 1GB
                builder.fp16_mode = fp16_mode
                engine = builder.build_cuda_engine(network)
                print('fp16 mode enabled')
            if fp32_mode:
                builder.max_batch_size = max_batch_size
                builder.max_workspace_size = 1 << 30  # 1GB
                engine = builder.build_cuda_engine(network)
                print('fp32 mode enabled')
            if engine is None:
                print('Failed to create the engine')
                return None
            print("Completed creating the engine")
            if save_engine:
                with open(engine_file_path, "wb") as f:
                    f.write(engine.serialize())
            return engine
Example #18
0
def build_engine_onnx(model_file):
    #创建相应的实例
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            common.EXPLICIT_BATCH) as network, builder.create_builder_config(
            ) as config, trt.OnnxParser(network, TRT_LOGGER) as parser:
        #设置相应的参数
        config.max_workspace_size = common.GiB(1)
        # Load the Onnx model and parse it in order to populate the TensorRT network.
        #读取相应的模型文件
        with open(model_file, 'rb') as model:
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        #构建相应的引擎
        return builder.build_engine(network, config)
Example #19
0
def run(nProfile):
    logger = trt.Logger(trt.Logger.ERROR)
    builder = trt.Builder(logger)
    network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    config = builder.create_builder_config()
    config.max_workspace_size = 7 << 30

    parser = trt.OnnxParser(network, logger)
    with open(onnxFile, 'rb') as model:
        parser.parse(model.read())

    if nProfile == 1:
        profile = builder.create_optimization_profile()
        inputT0 = network.get_input(0)
        inputT0.shape = [-1, 1]
        profile.set_shape(inputT0.name, (1, 1), (510, 1), (512, 1))
        config.add_optimization_profile(profile)
    else:
        profile0 = builder.create_optimization_profile()
        inputT0 = network.get_input(0)
        inputT0.shape = [-1, 1]
        profile0.set_shape(inputT0.name, (1, 1), (4, 1), (4, 1))
        config.add_optimization_profile(profile0)

        profile1 = builder.create_optimization_profile()
        inputT0 = network.get_input(0)
        inputT0.shape = [-1, 1]
        profile1.set_shape(inputT0.name, (510, 1), (510, 1), (512, 1))
        config.add_optimization_profile(profile1)

    engineString = builder.build_serialized_network(network, config)
    planFile = onnxFile.split('.')[0] + "-%d.plan" % nProfile
    with open(planFile, 'wb') as f:
        f.write(engineString)

    print("Succeeded building %s!" % (planFile))

    engine = trt.Runtime(logger).deserialize_cuda_engine(engineString)
    context = engine.create_execution_context()

    # 写教程的时候 trtexec 暂不支持 MultiOptimizationProfile,只能用 script 来做测试
    test(engine, context, 1)
    test(engine, context, 4)
    test(engine, context, 510)
    test(engine, context, 512)
Example #20
0
def convert_onnx_into_tensorrt_engine(onnx_model_file_path,
                                      trt_engine_output_file):
    """

    :param onnx_model_file_path:
    :param trt_engine_output_file:
    :return:
    """
    if ops.exists(trt_engine_output_file):
        print('Trt engine file: {:s} has been generated'.format(
            trt_engine_output_file))
        return
    try:
        with trt.Builder(TRT_LOGGER) as builder:
            explicit_batch = 1 << int(
                trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
            with builder.create_network(explicit_batch) as network:
                with trt.OnnxParser(network, TRT_LOGGER) as parser:
                    # Parse the model to create a network.
                    with open(onnx_model_file_path, 'rb') as model:
                        parser.parse(model.read())
                        for error_index in range(parser.num_errors):
                            print(parser.get_error(error_index).desc())
                            print(parser.get_error(error_index).code())
                            print(parser.get_error(error_index).file())

                    # Configure the builder here.
                    builder.max_batch_size = 8
                    builder.max_workspace_size = 1 << 32

                    # Build and return the engine. Note that the builder,
                    # network and parser are destroyed when this function returns.
                    engine = builder.build_cuda_engine(network)
                    if engine is not None:
                        with open(trt_engine_output_file, "wb") as f:
                            f.write(engine.serialize())
                        print('Successfully construct trt engine')
                        return engine
                    else:
                        print('Failed construct trt engine')
                        return engine
    except Exception as err:
        print(err)
        print('Failed to construct trt engine')
        return None
Example #21
0
    def process(self, cast_output_file=None):
        try:
            import tensorrt as trt
        except:
            LOG.logE(
                "You must install tensorrt package if you want to convert pytorch to onnx. 1. Download Tensorrt7.2.3(for CUDA11.0) from https://developer.nvidia.com/tensorrt \
            2. unpack Tensorrt*.tar.gz  3. pip install tensorrt-x-cpx-none-linux_x86_64.whl in Tensorrt*(your_tensorrt_path)/python",
                exit=True)
            return

        output_trt_file = self.config.model_dir
        if cast_output_file:
            output_trt_file = '{}/trt__{}.trt'.format(
                self.deepvac_core_config.output_dir, cast_output_file)
            self.config.model_dir = output_trt_file

        LOG.logI(
            "config.trt_model_dir found, save tensorrt model to {}...".format(
                self.config.model_dir))

        #to onnx, also set self.config.onnx_model_dir, self.config.onnx_input_names and self.config.onnx_output_names
        self.exportOnnx()

        trt_logger = trt.Logger(trt.Logger.WARNING)
        with trt.Builder(trt_logger) as builder, builder.create_network(
                1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
        ) as network, trt.OnnxParser(network, trt_logger) as parser:
            builder.max_workspace_size = 4 << 30
            builder.max_batch_size = 1
            with open(self.config.onnx_model_dir, 'rb') as model:
                parser.parse(model.read())
            config = builder.create_builder_config()
            if self.config.enable_dynamic_input:
                profile = builder.create_optimization_profile()
                profile.set_shape(self.config.onnx_input_names[0],
                                  self.config.input_min_dims,
                                  self.config.input_opt_dims,
                                  self.config.input_max_dims)
                config.add_optimization_profile(profile)
            engine = builder.build_engine(network, config)
            with open(output_trt_file, "wb") as f:
                f.write(engine.serialize())
        LOG.logI(
            "Pytorch model convert to TensorRT model succeed, save model in {}"
            .format(output_trt_file))
Example #22
0
def build_engine(onnx_file_path, engine_file_path, mode='fp32', verbose=False):
    """Takes an ONNX file and creates a TensorRT engine."""
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            *EXPLICIT_BATCH) as network, trt.OnnxParser(network,
                                                        TRT_LOGGER) as parser:
        builder.max_workspace_size = 1 << 28
        builder.max_batch_size = 1

        # Switch mode here
        if mode == 'fp16':
            builder.fp16_mode = True
        elif mode == 'int8':
            builder.int8_mode = True
        #builder.strict_type_constraints = True

        # Parse model file
        if not os.path.exists(onnx_file_path):
            print(
                'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'
                .format(onnx_file_path))
            exit(0)
        print('Loading ONNX file from path {}...'.format(onnx_file_path))
        with open(onnx_file_path, 'rb') as model:
            print('Beginning ONNX file parsing')
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None

        if trt.__version__[0] >= '7':
            # The actual yolov3.onnx is generated with batch size 64.
            # Reshape input to batch size 1
            shape = list(network.get_input(0).shape)
            shape[0] = 1
            network.get_input(0).shape = shape
        print('Completed parsing of ONNX file')

        print('Building an engine; this may take a while...')
        engine = builder.build_cuda_engine(network)
        print('Completed creating engine')
        with open(engine_file_path, 'wb') as f:
            f.write(engine.serialize())
        return engine
Example #23
0
def build_engine(onnx_path, shape=[1, 224, 224, 3], precision='FP32'):
    """
   This is the function to create the TensorRT engine
   Args:
      onnx_path : Path to onnx_file. 
      shape : Shape of the input of the ONNX file. 
  """
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            1) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        if precision == 'FP16':
            builder.fp16_mode = True
            builder.strict_type_constraints = True
        builder.max_workspace_size = (256 << 20)
        with open(onnx_path, 'rb') as model:
            parser.parse(model.read())
        network.get_input(0).shape = shape
        engine = builder.build_cuda_engine(network)
        return engine
 def build_engine():
     #EXPLICIT_BATCH = 1 << (int)(trt.BuilderFlag.FP16) | 1 << (int)(trt.BuilderFlag.STRICT_TYPES)
     EXPLICIT_BATCH = 1 << (int)(
         trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
     """Takes an ONNX file and creates a TensorRT engine to run inference with"""
     with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
             EXPLICIT_BATCH) as network, trt.OnnxParser(
                 network, TRT_LOGGER) as parser:
         #with trt.Builder(TRT_LOGGER) as builder, builder.create_network(network_flags) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
         if (builder.platform_has_fast_fp16):
             print('support fp16')
         if (builder.platform_has_fast_int8):
             print('support int8')
         if (builder.fp16_mode):
             print('fp16 kernels are permitted')
         builder.fp16_mode = True
         #builder.int8_mode = True
         builder.strict_type_constraints = True
         builder.max_workspace_size = 1 << 29  # 512MB
         builder.max_batch_size = 1
         # Parse model file
         if not os.path.exists(onnx_file_path):
             print(
                 'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'
                 .format(onnx_file_path))
             exit(0)
         print('Loading ONNX file from path {}...'.format(onnx_file_path))
         with open(onnx_file_path, 'rb') as model:
             print('Beginning ONNX file parsing')
             if not parser.parse(model.read()):
                 print('ERROR: Failed to parse the ONNX file.')
                 for error in range(parser.num_errors):
                     print(parser.get_error(error))
                 return None
         # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
         network.get_input(0).shape = [1, 3, 416, 416]
         print('Completed parsing of ONNX file')
         print('Building an engine from file {}; this may take a while...'.
               format(onnx_file_path))
         engine = builder.build_cuda_engine(network)
         print("Completed creating Engine")
         with open(engine_file_path, "wb") as f:
             f.write(engine.serialize())
         return engine
Example #25
0
    def initialize(self):
        # Create network.
        self.network = self.builder.create_network(
            1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))

        channel_idx = 1

        # Input shape
        input_tensor_dim = [-1] + self.input_volume_dim
        input_tensor_dim.insert(channel_idx, self.num_input_channel)

        # Parse from onnx file.
        parser = trt.OnnxParser(self.network, self.logger)
        model = self.preprocess_onnx(onnx.load(self.model_path))
        success = parser.parse(onnx._serialize(model))
        if not success:
            raise RuntimeError(
                "3D-Unet onnx model parsing failed! Error: {:}".format(
                    parser.get_error(0).desc()))

        # Set input/output tensor dtype and formats
        input_tensor = self.network.get_input(0)
        output_tensor = self.network.get_output(0)
        input_tensor.shape = input_tensor_dim

        if self.input_dtype == "int8":
            input_tensor.dtype = trt.int8
        elif self.input_dtype == "fp16":
            input_tensor.dtype = trt.float16
        elif self.input_dtype == "fp32":
            input_tensor.dtype = trt.float32

        if self.input_format == "linear":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)
        elif self.input_format == "dhwc8":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.DHWC8)
        elif self.input_format == "cdhw32":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CDHW32)

        # Always use FP16 output
        output_tensor.dtype = trt.float16
        output_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)

        self.initialized = True
def _build_engine_onnx(onnx_path: str,
                       force_fp16: bool = False,
                       max_batch_size: int = 1,
                       im_size: Tuple[int] = None):
    '''
    Builds TensorRT engine from provided ONNX file

    :param onnx_path: Path to ONNX file on disk
    :param force_fp16: Force use of FP16 precision, even if device doesn't support it. Be careful.
    :param max_batch_size: Define maximum batch size supported by engine. If >1 creates optimization profile.
    :param im_size: Required if max_batch_size > 1. Used for creation of optimization profile.
    :return: TensorRT engine
    '''

    with trt.Builder(TRT_LOGGER) as builder, \
            builder.create_network(EXPLICIT_BATCH) as network, \
            builder.create_builder_config() as config, \
            trt.OnnxParser(network, TRT_LOGGER) as parser:

        if builder.platform_has_fast_fp16 or force_fp16 is True:
            builder.fp16_mode = True
            builder.strict_type_constraints = True

        builder.max_workspace_size = 1 << 20

        if max_batch_size != 1 and im_size is not None:
            logging.warning(
                'Batch size !=1 is used. Ensure your inference code supports it.'
            )
            profile = builder.create_optimization_profile()
            profile.set_shape('data', (1, 3) + im_size,
                              (max_batch_size, 3) + im_size,
                              (max_batch_size, 3) + im_size)
            config.add_optimization_profile(profile)

        with open(onnx_path, "rb") as f:
            if not parser.parse(f.read()):
                print('ERROR: Failed to parse the ONNX file: {}'.format(
                    onnx_path))
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                sys.exit(1)

            return builder.build_engine(network, config=config)
Example #27
0
def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
    try:
        check_requirements(('tensorrt',))
        import tensorrt as trt

        opset = (12, 13)[trt.__version__[0] == '8']  # test on TensorRT 7.x and 8.x
        export_onnx(model, im, file, opset, train, False, simplify)
        onnx = file.with_suffix('.onnx')
        assert onnx.exists(), f'failed to export ONNX file: {onnx}'

        LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
        f = file.with_suffix('.engine')  # TensorRT engine file
        logger = trt.Logger(trt.Logger.INFO)
        if verbose:
            logger.min_severity = trt.Logger.Severity.VERBOSE

        builder = trt.Builder(logger)
        config = builder.create_builder_config()
        config.max_workspace_size = workspace * 1 << 30

        flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
        network = builder.create_network(flag)
        parser = trt.OnnxParser(network, logger)
        if not parser.parse_from_file(str(onnx)):
            raise RuntimeError(f'failed to load ONNX file: {onnx}')

        inputs = [network.get_input(i) for i in range(network.num_inputs)]
        outputs = [network.get_output(i) for i in range(network.num_outputs)]
        LOGGER.info(f'{prefix} Network Description:')
        for inp in inputs:
            LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
        for out in outputs:
            LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')

        half &= builder.platform_has_fast_fp16
        LOGGER.info(f'{prefix} building FP{16 if half else 32} engine in {f}')
        if half:
            config.set_flag(trt.BuilderFlag.FP16)
        with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
            t.write(engine.serialize())
        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')

    except Exception as e:
        LOGGER.info(f'\n{prefix} export failure: {e}')
def build_engine(model_path, shape):
    with trt.Builder(TRT_LOGGER) as builder, \
        builder.create_network(flags=network_flags) as network, \
        trt.OnnxParser(network, TRT_LOGGER) as parser:

        builder.max_batch_size = 1
        with open(model_path, "rb") as f:
            if not parser.parse(f.read()):
                print("ERROR: Failed to parse the ONNX file")
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None

        config = builder.create_builder_config()
        config.max_workspace_size = 1 << 30

        network.get_input(0).shape = shape
        engine = builder.build_engine(network, config)
        return engine
Example #29
0
def build_engine(TRT_LOGGER, onnx_path, shape = [1,224,224,3]):
    """
    This is the function to create the TensorRT engine
    Args:
       onnx_path : Path to onnx_file. 
       shape : Shape of the input of the ONNX file. 
   """
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(1) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = (256 << 20)
        with open(onnx_path, 'rb') as model:
            parser.parse(model.read())
        network.get_input(0).shape = shape
        # use FP16 mode if possible
        if builder.platform_has_fast_fp16:
            builder.fp16_mode = True

        # generate TensorRT engine optimized for the target platform
        engine = builder.build_cuda_engine(network)
        return engine
def ONNX2TRT(args, calib=None):
    ''' convert onnx to tensorrt engine, use mode of ['fp32', 'fp16', 'int8']
    :return: trt engine
    '''

    assert args.mode.lower() in [
        'fp32', 'fp16', 'int8'
    ], "mode should be in ['fp32', 'fp16', 'int8']"

    G_LOGGER = trt.Logger(trt.Logger.WARNING)
    with trt.Builder(G_LOGGER) as builder, builder.create_network() as network, \
            trt.OnnxParser(network, G_LOGGER) as parser:

        builder.max_batch_size = args.batch_size
        builder.max_workspace_size = 1 << 30
        if args.mode.lower() == 'int8':
            assert (builder.platform_has_fast_int8 == True), "not support int8"
            builder.int8_mode = True
            builder.int8_calibrator = calib
        elif args.mode.lower() == 'fp16':
            assert (builder.platform_has_fast_fp16 == True), "not support fp16"
            builder.fp16_mode = True

        print('Loading ONNX file from path {}...'.format(args.onnx_file_path))
        with open(args.onnx_file_path, 'rb') as model:
            print('Beginning ONNX file parsing')
            parser.parse(model.read())
        print('Completed parsing of ONNX file')

        print(
            'Building an engine from file {}; this may take a while...'.format(
                args.onnx_file_path))
        engine = builder.build_cuda_engine(network)
        print("Created engine success! ")

        # 保存计划文件
        print('Saving TRT engine file to path {}...'.format(
            args.engine_file_path))
        with open(args.engine_file_path, "wb") as f:
            f.write(engine.serialize())
        print('Engine file has already saved to {}!'.format(
            args.engine_file_path))
        return engine