Example #1
0
def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
        ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 28  # 256MiB
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print(
                    'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'
                    .format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                #parser.parse(model.read())
                #parser.parse returns a bool, and we were not checking it originally.
                if not parser.parse(model.read()):
                    print(parser.get_error(0))
                print(
                    network.get_layer(network.num_layers -
                                      1).get_output(0).shape)
            network.mark_output(
                network.get_layer(network.num_layers - 1).get_output(0))

            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.
                  format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            print(engine)
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path,
                  "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()
Example #2
0
def get_engine(onnx_file_path, engine_file_path, input_size, rebuild=True):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    print("Explicit batch: ", common.EXPLICIT_BATCH)
    max_batch_size = 1

    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
                common.EXPLICIT_BATCH) as network, trt.OnnxParser(
                    network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 30  # 2048MB?
            builder.max_batch_size = max_batch_size
            #builder.int8_mode = True
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                if not parser.parse(model.read()):
                    print('ERROR: Failed to parse the ONNX file.')
                    for error in range(parser.num_errors):
                        print(parser.get_error(error))
                    return None
            # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
            print("jee")
            network.get_input(0).shape = [max_batch_size] + input_size
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.
                  format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path) and not rebuild:
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path,
                  "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()
    def __init__(self, engine_file_path, num_class):
        # Create a Context on this device,
        self.cfx = cuda.Device(0).make_context()
        stream = cuda.Stream()
        TRT_LOGGER = trt.Logger(trt.Logger.INFO)
        runtime = trt.Runtime(TRT_LOGGER)

        # Deserialize the engine from file
        with open(engine_file_path, "rb") as f:
            engine = runtime.deserialize_cuda_engine(f.read())
        context = engine.create_execution_context()

        host_inputs = []
        cuda_inputs = []
        host_outputs = []
        cuda_outputs = []
        bindings = []

        for binding in engine:
            size = trt.volume(
                engine.get_binding_shape(binding)) * engine.max_batch_size
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            cuda_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(cuda_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                host_inputs.append(host_mem)
                cuda_inputs.append(cuda_mem)
            else:
                host_outputs.append(host_mem)
                cuda_outputs.append(cuda_mem)

        # Store
        self.stream = stream
        self.context = context
        self.engine = engine
        self.host_inputs = host_inputs
        self.cuda_inputs = cuda_inputs
        self.host_outputs = host_outputs
        self.cuda_outputs = cuda_outputs
        self.bindings = bindings
        self.num_class = num_class
Example #4
0
def build_engine(weights, engine_file_path):
    if os.path.exists(engine_file_path):
        with open(engine_file_path,
                  "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    # For more information on TRT basics, refer to the introductory samples.
    with trt.Builder(
            TRT_LOGGER) as builder, builder.create_network() as network:
        builder.max_workspace_size = 2 << 30  # 4 GiB
        # Populate the network using weights from the PyTorch model.
        populate_network(network, weights)
        # Build and return an engine.
        engine = builder.build_cuda_engine(network)
        assert engine is not None
        print("Completed creating Engine")
        with open(engine_file_path, "wb") as f:
            f.write(engine.serialize())
        return engine
Example #5
0
    def _load_from_state_dict(
        self,
        state_dict,
        prefix,
        local_metadata,
        strict,
        missing_keys,
        unexpected_keys,
        error_msgs,
    ):
        engine_bytes = state_dict[prefix + "engine"]

        with trt.Logger() as logger, trt.Runtime(logger) as runtime:
            self.engine = runtime.deserialize_cuda_engine(engine_bytes)
            self.context = self.engine.create_execution_context()

        self.input_names = state_dict[prefix + "input_names"]
        self.output_names = state_dict[prefix + "output_names"]
Example #6
0
    def create_engine(self):
        print("creating engine")
        if not os.path.exists(self.uff_file_name):
            print(self.uff_file_name, "not found. create new uff file")
            self.convert_to_uff()

        if not os.path.exists(self.engine_file_name):
            print(self.engine_file_name, "not found. build new engine")
            engine = self.build_engine()
            with open(self.engine_file_name, "wb") as f:
                f.write(engine.serialize())
        else:
            print(self.engine_file_name, "found. reuse engine")
            with open(self.engine_file_name,
                      "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
                engine = runtime.deserialize_cuda_engine(f.read())
        self.engine = engine
        print("engine created")
Example #7
0
def get_engine(model_path: str):
    """
        Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.
    """
    if os.path.exists(model_path):
        if model_path.endswith('trt'):
            print(f"Reading engine from file {model_path}")
            with open(model_path,
                      'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
                return runtime.deserialize_cuda_engine(f.read())

        elif model_path.endswith('onnx'):
            build_engine(model_path)

        else:
            print("Invalid File: Only .onnx and .trt are supported.")
    else:
        print(f"FILE: {model_path} not found.")
Example #8
0
    def _serialize_deserialize(self, trt_engine):
        if USE_PYBIND:
            self.runtime = trt.Runtime(TRT_LOGGER)
        else:
            self.runtime = trt.infer.create_infer_runtime(self._logger)
            self.plugin_factory = parser_runtime.create_plugin_factory(
                self._logger)

        serialized_engine = trt_engine.serialize()
        del self.parser  # Parser no longer needed for ownership of plugins

        if USE_PYBIND:
            trt_engine = self.runtime.deserialize_cuda_engine(
                serialized_engine)
        else:
            trt_engine = self.runtime.deserialize_cuda_engine(
                serialized_engine, self.plugin_factory)
        return trt_engine
Example #9
0
    def __init__(self,
                 trt_model_path,
                 device,
                 img_mean=np.array([128, 128, 128], dtype=np.float32),
                 img_scale=np.float32(1 / 255)):
        assert device == 'GPU', 'Only supports GPU.'
        TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
        # runtime = trt.Runtime(TRT_LOGGER)
        with open(trt_model_path,
                  'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
            self.engine = runtime.deserialize_cuda_engine(f.read())

        self.img_mean = img_mean
        self.img_scale = img_scale
        # self.inputs, self.outputs, self.bindings, self.stream = self._allocate_buffers(self.engine)
        self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers(
            self.engine)
        self.context = self.engine.create_execution_context()
Example #10
0
 def __init__(self,
              build_engine_proc=None,
              build_engine_params=None,
              engine_file_path=None):
     tensorrt.init_libnvinfer_plugins(None, "")
     logger = tensorrt.Logger(tensorrt.Logger.INFO)
     if engine_file_path is None:
         with tensorrt.Builder(logger) as builder:
             if build_engine_params is not None:
                 self.engine = build_engine_proc(builder,
                                                 *build_engine_params)
             else:
                 self.engine = build_engine_proc(builder)
     else:
         with open(engine_file_path,
                   'rb') as f, tensorrt.Runtime(logger) as runtime:
             self.engine = runtime.deserialize_cuda_engine(f.read())
     self.context = self.engine.create_execution_context()
Example #11
0
 def __init__(self, model):
     print('setting up Yolov5s-simple.trt processor')
     # load tensorrt engine
     TRT_LOGGER = trt.Logger(trt.Logger.INFO)
     TRTbin = '{0}/models/{1}'.format(os.path.dirname(__file__), model)
     with open(TRTbin, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
         engine = runtime.deserialize_cuda_engine(f.read())
     self.context = engine.create_execution_context()
     # allocate memory
     inputs, outputs, bindings = [], [], []
     stream = cuda.Stream()
     for binding in engine:
         size = trt.volume(engine.get_binding_shape(binding))
         dtype = trt.nptype(engine.get_binding_dtype(binding))
         host_mem = cuda.pagelocked_empty(size, dtype)
         device_mem = cuda.mem_alloc(host_mem.nbytes)
         bindings.append(int(device_mem))
         if engine.binding_is_input(binding):
             inputs.append({'host': host_mem, 'device': device_mem})
         else:
             outputs.append({'host': host_mem, 'device': device_mem})
     # save to class
     self.inputs = inputs
     self.outputs = outputs
     self.bindings = bindings
     self.stream = stream
     # post processing config
     filters = (80 + 5) * 3
     self.output_shapes = [(1, 3, 80, 80, 85), (1, 3, 40, 40, 85),
                           (1, 3, 20, 20, 85)]
     self.strides = np.array([8., 16., 32.])
     anchors = np.array([
         [[10, 13], [16, 30], [33, 23]],
         [[30, 61], [62, 45], [59, 119]],
         [[116, 90], [156, 198], [373, 326]],
     ])
     self.nl = len(anchors)
     self.nc = 80  # classes
     self.no = self.nc + 5  # outputs per anchor
     self.na = len(anchors[0])
     a = anchors.copy().astype(np.float32)
     a = a.reshape(self.nl, -1, 2)
     self.anchors = a.copy()
     self.anchor_grid = a.copy().reshape(self.nl, 1, -1, 1, 1, 2)
Example #12
0
    def test_calibrator_outside_polygraphy(self, identity_builder_network):
        builder, network = identity_builder_network
        NUM_BATCHES = 2

        config = builder.create_builder_config()
        config.set_flag(trt.BuilderFlag.INT8)
        with Calibrator(generate_data(NUM_BATCHES)) as calibrator:
            config.int8_calibrator = calibrator

            if mod.version(trt.__version__) < mod.version("8.0"):
                engine = builder.build_engine(network, config)
            else:
                with trt.Runtime(get_trt_logger()) as runtime:
                    engine = runtime.deserialize_cuda_engine(
                        builder.build_serialized_network(network, config))

            with engine:
                assert engine
        self.check_calibrator_cleanup(calibrator)
def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
                common.EXPLICIT_BATCH
        ) as network, builder.create_builder_config(
        ) as config, trt.OnnxParser(network, TRT_LOGGER) as parser:
            config.max_workspace_size = 1 << 28  # 256MiB
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print(
                    'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'
                    .format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                if not parser.parse(model.read()):
                    print('ERROR: Failed to parse the ONNX file.')
                    for error in range(parser.num_errors):
                        print(parser.get_error(error))
                    return None
            # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
            network.get_input(0).shape = [1, 3, 608, 608]
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.
                  format(onnx_file_path))
            engine = builder.build_engine(network, config)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path,
                  "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()
Example #14
0
    def _load_from_state_dict(
        self,
        state_dict,
        prefix,
        local_metadata,
        strict,
        missing_keys,
        unexpected_keys,
        error_msgs,
    ):
        engine_bytes = state_dict[prefix + "engine"]

        logger = trt.Logger()
        runtime = trt.Runtime(logger)
        self.engine = runtime.deserialize_cuda_engine(engine_bytes)

        self.input_names = state_dict[prefix + "input_names"]
        self.output_names = state_dict[prefix + "output_names"]
        self._initialize()
Example #15
0
    def __init__(self, config):
        self.detection_threshold = config["detection_threshold"]
        self.nms_threshold = config["nms_threshold"]
        self.engine_path = config["engine_file"]
        self.class_names = load_class_names(config["names_file"])

        self.logger = trt.Logger()
        self.runtime = trt.Runtime(self.logger)

        print("Reading engine from file {}".format(self.engine_path))
        with open(self.engine_path, "rb") as f:
            self.engine = self.runtime.deserialize_cuda_engine(f.read())

        self.context = self.engine.create_execution_context()
        self.buffers = self._allocate_buffers(self.engine, 1)

        self.input_h = 608  # Set in yolov4-facemask.cfg
        self.input_w = 608
        self.context.set_binding_shape(0, (1, 3, self.input_h, self.input_w))
 def _load_engine(self, model_path):
     if not model_path:
         logging.info(
             "you didn't specify the model file so the COCO pretrained model will be used"
         )
         base_url = "https://github.com/Tony607/jetson_nano_trt_tf_ssd/raw/master/packages/jetpack4.3/"
         base_dir = "detectors/data/"
         model_file = "TRT_ssd_mobilenet_v2_coco.bin"
         model_path = os.path.join(base_dir, model_file)
         if not os.path.isfile(model_path):
             logging.info(
                 'model does not exist under: {}, downloading from {}'.
                 format(str(model_path), base_url + model_file))
             os.makedirs(base_dir, exist_ok=True)
             wget.download(base_url + model_file, model_path)
     """ Load engine file as a trt Runtime. """
     with open(model_path,
               'rb') as f, trt.Runtime(self.trt_logger) as runtime:
         return runtime.deserialize_cuda_engine(f.read())
Example #17
0
def build_engine(onnx_path, using_half):
    engine_file = onnx_path.replace(".onnx", ".engine")
    if os.path.exists(engine_file):
        with open(engine_file, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_batch_size = 1 # always 1 for explicit batch
        config = builder.create_builder_config()
        config.max_workspace_size = GiB(1)
        if using_half:
            config.set_flag(trt.BuilderFlag.FP16)
        # Load the Onnx model and parse it in order to populate the TensorRT network.
        with open(onnx_path, 'rb') as model:
            if not parser.parse(model.read()):
                print ('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print (parser.get_error(error))
                return None
        return builder.build_engine(network, config)
Example #18
0
def get_engine(onnx_file_path,
               engine_file_path="",
               fp16_mode=False,
               overwrite=False):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
        ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 28  # 256MB
            builder.max_batch_size = 1
            if fp16_mode:
                print('Using FP16 mode')
                builder.fp16_mode = True
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print(
                    'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'
                    .format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.
                  format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            print(
                'Saving TensorRT file to path {}...'.format(engine_file_path))
            return engine

    if os.path.exists(engine_file_path) and not overwrite:
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path,
                  "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()
Example #19
0
 def __init__(self, model, anchor_nums, nc, anchors, output_shapes,
              img_size):
     # load tensorrt engine
     self.cfx = cuda.Device(0).make_context()
     TRT_LOGGER = trt.Logger(trt.Logger.INFO)
     TRTbin = model
     # print('trtbin', TRTbin)
     runtime = trt.Runtime(TRT_LOGGER)
     with open(TRTbin, 'rb') as f:
         engine = runtime.deserialize_cuda_engine(f.read())
     self.context = engine.create_execution_context()
     # allocate memory
     inputs, outputs, bindings = [], [], []
     stream = cuda.Stream()
     for binding in engine:
         size = trt.volume(
             engine.get_binding_shape(binding)) * engine.max_batch_size
         dtype = trt.nptype(engine.get_binding_dtype(binding))
         host_mem = cuda.pagelocked_empty(size, dtype)
         device_mem = cuda.mem_alloc(host_mem.nbytes)
         bindings.append(int(device_mem))
         if engine.binding_is_input(binding):
             inputs.append({'host': host_mem, 'device': device_mem})
         else:
             outputs.append({'host': host_mem, 'device': device_mem})
     # save to class
     self.inputs = inputs
     self.outputs = outputs
     self.bindings = bindings
     self.stream = stream
     self.anchor_nums = anchor_nums
     self.nc = nc  # classes
     self.no = self.nc + 5  # outputs per anchor
     # post processing config
     self.output_shapes = output_shapes
     self.strides = np.array([8., 16., 32.])
     self.na = len(anchors[0])
     self.nl = len(anchors)
     self.img_size = img_size
     a = anchors.copy().astype(np.float32)
     a = a.reshape(self.nl, -1, 2)
     self.anchors = a.copy()
     self.anchor_grid = a.copy().reshape(self.nl, 1, -1, 1, 1, 2)
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        builder = trt.Builder(TRT_LOGGER)
        network = builder.create_network(common.EXPLICIT_BATCH)
        parser = trt.OnnxParser(network, TRT_LOGGER)
        runtime = trt.Runtime(TRT_LOGGER)

        # Parse model file
        print('Loading ONNX file from path {}...'.format(onnx_file_path))
        with open(onnx_file_path, 'rb') as model:
            print('Beginning ONNX file parsing')
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        print('Completed parsing of ONNX file')

        # Print input info
        print('Network inputs:')
        for i in range(network.num_inputs):
            tensor = network.get_input(i)
            print(tensor.name, trt.nptype(tensor.dtype), tensor.shape)

        network.get_input(0).shape = [10, 1]
        network.get_input(1).shape = [10, 1, 1, 16]
        network.get_input(2).shape = [6, 1]
        network.get_input(3).shape = [6, 1, 1, 16]

        config = builder.create_builder_config()
        config.set_flag(trt.BuilderFlag.REFIT)
        config.max_workspace_size = 1 << 28  # 256MiB

        print(
            'Building an engine from file {}; this may take a while...'.format(
                onnx_file_path))
        plan = builder.build_serialized_network(network, config)
        engine = runtime.deserialize_cuda_engine(plan)
        print("Completed creating Engine")

        with open(engine_file_path, "wb") as f:
            f.write(plan)
        return engine
Example #21
0
def get_engine(onnx_file_path, engine_file_path=""):
    """
    Attempts to load a serialized engine if available,
    otherwise builds a new TensorRT engine and saves it.
    """
    print("hello world")

    def build_engine():
        """
        Takes an ONNX file and creates a TensorRT engine to run inference with
        """
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
        ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 2 << 30  # 1GB
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print(
                    'ONNX file {} not found, please run platedetection2onnx.py first to generate it.'
                    .format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.
                  format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print(engine)
            print('Completed creating Engine')
            with open(engine_file_path, 'wb') as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print('Reading engine from file {}'.format(engine_file_path))
        with open(engine_file_path,
                  'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()
Example #22
0
def main():

    arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, \
                    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0
                    ])
    print(arr)


    # with build_engine() as engine:
    #     # Build an engine, allocate buffers and create a stream.
    #     # For more information on buffer allocation, refer to the introductory samples.
    #     h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
    #     np.copyto(h_input, arr)
    #     # print("debug")
    #     with engine.create_execution_context() as context:
    #         do_inference(context, h_input, d_input, h_output, d_output, stream)
    #         print(h_output)

    # save_engine = os.path.join(os.path.dirname(__file__), "sample.engine")
    # with build_engine() as engine:
    #     with open(save_engine, "wb") as f:
    #         f.write(engine.serialize())

    save_engine = os.path.join(os.path.dirname(__file__), "sample.engine")
    with open(save_engine, "rb") as f,  trt.Runtime(TRT_LOGGER) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
        np.copyto(h_input, arr)
        # print("debug")
        with engine.create_execution_context() as context:
            do_inference(context, h_input, d_input, h_output, d_output, stream)
            print(h_output)
Example #23
0
def main():
    common.add_help(description="Yeah!")
    # Get the PyTorch weights
    weights = torch.load('mobilenetv3_centernet162_910.pth',
                         map_location={'cuda:0': 'cpu'})
    mobilenetv3 = get_pose_net({'hm': 2, 'wh': 2, 'reg': 2})
    mobilenetv3.load_state_dict(weights, strict=False)
    mobilenetv3.eval()
    # Do inference with TensorRT.
    with MobileNetv3(weights).engine as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        with open('mobilenetv3-centernet.trt', "wb") as f:
            f.write(engine.serialize())

        with open('mobilenetv3.trt',
                  "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            engine = runtime.deserialize_cuda_engine(f.read())
            inputs, outputs, bindings, stream = common.allocate_buffers(engine)
            with engine.create_execution_context() as context:
                t = 0
                for _ in range(1):
                    img = load_random_test_case(
                        pagelocked_buffer=inputs[0].host)
                    # For more information on performing inference, refer to the introductory samples.
                    # The common.do_inference function will return a list of outputs - we only have one in this case.
                    a = time.time()
                    [hm, wh, reg, _] = common.do_inference(context,
                                                           bindings=bindings,
                                                           inputs=inputs,
                                                           outputs=outputs,
                                                           stream=stream,
                                                           batch_size=1)
                    t += time.time() - a

        with torch.no_grad():
            [baseline] = mobilenetv3.cuda()(torch.from_numpy(img).cuda())
            print('baseline: ', baseline['hm'].mean().cpu().numpy(),
                  baseline['wh'].mean().cpu().numpy(),
                  baseline['reg'].mean().cpu().numpy())
        print('output:   ', np.mean(hm), np.mean(wh), np.mean(reg))
    print('Time: ', t)
def get_engine(onnx_file_path, width=608, height=608, batch_size=1, engine_file_path="", int8mode=False,
               calib_file='yolo_calibration.cache'):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, \
                trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 28  # 256MiB
            builder.max_batch_size = batch_size
            if int8mode:
                # calibrator definition
                calibration_dataset_loc = "calibration_dataset/"
                calibration_cache = calib_file
                calib = calibrator.PythonEntropyCalibrator(calibration_dataset_loc, cache_file=calibration_cache,
                                                           width=width, height=height, batch_size=batch_size)
                builder.int8_mode = True
                builder.int8_calibrator = calib
            else:
                builder.fp16_mode = True
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()
Example #25
0
    def __init__(self, trt_path):
        # get model name
        self._model_name = os.path.basename(trt_path)
        self._model_name = self._model_name[:self._model_name.rfind(".")]

        # create engine
        self.trt_path = trt_path
        self.logger = trt.Logger()
        self.runtime = trt.Runtime(self.logger)
        with open(trt_path, "rb") as f:
            self.engine = self.runtime.deserialize_cuda_engine(f.read())

        # create context and buffer
        self.context = self.engine.create_execution_context()
        self.stream = cuda.Stream()
        bindings = []
        host_input = device_input = host_output = device_output = None

        for binding in self.engine:
            binding_idx = self.engine.get_binding_index(binding)
            print(f"binding name {binding}, idx {binding_idx}")
            shape = trt.volume(self.context.get_binding_shape(binding_idx))
            dtype = trt.nptype(self.engine.get_binding_dtype(binding))
            if self.engine.binding_is_input(binding):
                print(shape)
                host_input = np.empty(shape, dtype=np.float32)
                device_input = cuda.mem_alloc(host_input.nbytes)
                bindings.append(int(device_input))
            else:
                host_output = cuda.pagelocked_empty(shape, dtype)
                device_output = cuda.mem_alloc(host_output.nbytes)
                bindings.append(int(device_output))

        assert device_input is not None
        assert device_output is not None
        assert len(bindings) == 2

        self.bindings = bindings
        self.device_input = device_input
        self.host_input = host_input
        self.device_output = device_output
        self.host_output = host_output
Example #26
0
def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""

    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network,
                                                                                                     TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 30  # 1GB
            builder.max_batch_size = 1
            # builder.fp16_mode = True
            # builder.strict_type_constraints = True

            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                raise FileExistsError(
                    'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))

            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            engine = builder.build_cuda_engine(network)

            if engine is None:
                print('build engine have some error')
                raise Exception('build engine have some error')

            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
                print("Completed creating Engine")
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()
Example #27
0
def main():
    #     while True:
    #         pass
    #     data_test_path = "/home/ubuntu/MyFiles/ZTE/FACE-ALL-5-POINTS_CROP/"
    data_test_path = "/home/ubuntu/MyFiles/ZTE/1000pairs/"
    data_txt = "/home/ubuntu/MyFiles/ZTE/test_2000_images_list.txt"

    with open("zte.v7.engine", "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())
        #         while True:
        #             pass
        batch_size = BATCH_SIZE

        # Allocate buffers and create a CUDA stream.
        features1 = []
        features2 = []
        with engine.create_execution_context() as context:

            data_test1, data_test2 = get_test_data(data_test_path, data_txt)
            for i in range(TEST_NUM):
                h_input, d_input, h_output, d_output, stream = allocate_buffers(
                    engine)
                test_case = load_normalized_test_case(data_test1[i], h_input)
                do_inference(context, h_input, d_input, h_output, d_output,
                             stream)
                features1.append(h_output)
#             print(data_test1[0:10])

            for i in range(TEST_NUM):
                h_input, d_input, h_output, d_output, stream = allocate_buffers(
                    engine)
                test_case = load_normalized_test_case(data_test2[i], h_input)
                do_inference(context, h_input, d_input, h_output, d_output,
                             stream)
                features2.append(h_output)


#             print(data_test2[0:10])
#         print(features1[0][0:10])
#         print(features2[0][0:10])

        GetTPR(features1, features2)
Example #28
0
    def __init__(self, model_path, device):
        cuda.init()
        self.__device = cuda.Device(device)
        self.context = self.__device.make_context()

        trt.init_libnvinfer_plugins(engine.TRT_LOGGER, '')

        self.__trt_runtime = trt.Runtime(engine.TRT_LOGGER)
        try:
            self.__trt_engine = engine.load_engine(
                self.__trt_runtime, os.path.join(model_path, 'gpu.buf'))
        except Exception as e:
            self.__finalize()
            raise e

        self._allocate_buffers()

        self.__model_shape = itemgetter(1, 2)(
            self.__trt_engine.get_binding_shape('Input'))
        self.__execution_context = self.__trt_engine.create_execution_context()
Example #29
0
 def _engine_init(self):
     """
     load a engine buffer or buid a new one
     :return: a trt engine obj
     """
     self.trt_runtime = trt.Runtime(TRT_LOGGER)
     self.trt_engine = None
     engine_file = os.path.splitext(self.model_dir)[0] + '.engine'
     if not os.path.exists(engine_file) or self.force_rebuild:
         print('no built engine found, building a new one...')
         model_type = os.path.splitext(self.model_dir)[-1]
         valid_model_format = ['.pb', '.onnx']
         assert model_type in valid_model_format, 'provided model is invalid:{}/{}'.format(
             model_type, valid_model_format)
         self.trt_engine = TensorrtBuilder.build_engine_from_pb_or_onnx(
             self.model_dir, **self.kwargs)
     else:
         print('loading built engine:{}...'.format(engine_file))
         self.trt_engine = TensorrtBuilder._load_engine(
             self.trt_runtime, engine_file)
Example #30
0
    def __init__(self, tensorrt_engine_path: str):
        assert (os.path.exists(tensorrt_engine_path))
        with open(tensorrt_engine_path, "rb") as fp:
            self.runtime = trt.Runtime(trt.Logger())
            self.engine = self.runtime.deserialize_cuda_engine(fp.read())
        # 输入和输出的array数据类型
        self.array_in_dtype = trt.nptype(self.engine.get_binding_dtype(0))
        self.array_out_dtype = trt.nptype(self.engine.get_binding_dtype(1))
        # 输入和输出的array数据shape(无batch)
        self.array_in_shape = self.engine.get_binding_shape(0)
        self.array_out_shape = self.engine.get_binding_shape(1)

        self.stream = cuda.Stream()
        self.h_input = cuda.pagelocked_empty(trt.volume(self.array_in_shape),
                                             dtype=self.array_in_dtype)
        self.h_output = cuda.pagelocked_empty(trt.volume(self.array_out_shape),
                                              dtype=self.array_out_dtype)
        # Allocate device memory for inputs and outputs.
        self.d_input = cuda.mem_alloc(self.h_input.nbytes)
        self.d_output = cuda.mem_alloc(self.h_output.nbytes)