def onnx_to_int8(anno, prefix):
    apex = onnxparser.create_onnxconfig()

    apex.set_model_file_name("model.onnx")
    apex.set_model_dtype(trt.infer.DataType.FLOAT)
    apex.set_print_layer_info(False)
    trt_parser = onnxparser.create_onnxparser(apex)
    data_type = apex.get_model_dtype()
    onnx_filename = apex.get_model_file_name()
    trt_parser.parse(onnx_filename, data_type)

    trt_parser.convert_to_trtnetwork()
    trt_network = trt_parser.get_trtnetwork()

    # calibration_files = create_calibration_dataset()
    batchstream = calibrator.ImageBatchStream(5, anno, prefix)
    int8_calibrator = calibrator.PythonEntropyCalibrator(["data"], batchstream)

    G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)

    builder = trt.infer.create_infer_builder(G_LOGGER)
    builder.set_max_batch_size(16)
    builder.set_max_workspace_size(1 << 20)
    builder.set_int8_calibrator(int8_calibrator)
    builder.set_int8_mode(True)
    engine = builder.build_cuda_engine(trt_network)
    modelstream = engine.serialize()
    trt.utils.write_engine_to_file("engin.bin", modelstream)
    engine.destroy()
    builder.destroy()
Esempio n. 2
0
def onnx_infer(anno, prefix):
    apex = onnxparser.create_onnxconfig()
    apex.set_model_file_name("model.onnx")
    apex.set_model_dtype(trt.infer.DataType.FLOAT)
    apex.set_print_layer_info(False)
    trt_parser = onnxparser.create_onnxparser(apex)
    data_type = apex.get_model_dtype()
    onnx_filename = apex.get_model_file_name()
    trt_parser.parse(onnx_filename, data_type)
    trt_parser.convert_to_trtnetwork()
    trt_network = trt_parser.get_trtnetwork()
    G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
    builder = trt.infer.create_infer_builder(G_LOGGER)
    builder.set_max_batch_size(16)
    engine = builder.build_cuda_engine(trt_network)
    context = engine.create_execution_context()
    lines = open(anno).readlines()
    total = 0
    correct = 0
    for line in lines:
        path, gt = line.strip().split(' ')
        gt = int(gt)
        img = read_image_chw(os.path.join(prefix, path))
        output = infer(context, img, 10, 1)
        conf, pred = Tensor(output).topk(1, dim=0)
        pred = int(pred.data[0])
        if pred == gt:
            correct += 1
        total += 1
    print(correct / total)
Esempio n. 3
0
def main():
    path = dir_path = os.path.dirname(os.path.realpath(__file__))

    # Create onnx_config
    apex = onnxparser.create_onnxconfig()
    apex.set_model_file_name(MODEL)
    apex.set_model_dtype(trt.infer.DataType_kFLOAT)

    # Create onnx parser
    parser = onnxparser.create_onnxparser(apex)
    assert (parser)
    data_type = apex.get_model_dtype()
    onnx_filename = apex.get_model_file_name()
    parser.parse(onnx_filename, data_type)
    parser.report_parsing_info()
    parser.convert_to_trtnetwork()

    # retrieve network interface from the parser
    network = parser.get_trtnetwork()
    assert (network)

    # create infer builder
    builder = trt.infer.create_infer_builder(G_LOGGER)
    if (apex.get_model_dtype() == trt.infer.DataType_kHALF):
        builder.set_fp16_mode(True)
    elif (apex.get_model_dtype() == trt.infer.DataType_kINT8):
        print("Int8 Model not supported")
        sys.exit()

    # create engine
    engine = builder.build_cuda_engine(network)
    assert (engine)

    rand_file = randint(0, 9)
    img = get_testcase(DATA + str(rand_file) + '.pgm')
    data = normalize(img)
    print("Test case: " + str(rand_file))
    if data.size == 0:
        msg = "The input tensor is of zero size - please check your path to the input or the file type"
        G_LOGGER.log(trt.infer.Logger.Severity_kERROR, msg)

    out = infer(engine, data, 1)
    print("Prediction: " + str(np.argmax(out)))

    # clean up
    engine.destroy()
    network.destroy()
    parser.destroy()
Esempio n. 4
0
def onnx_2_float32():
    apex = onnxparser.create_onnxconfig()
    apex.set_model_file_name(args.onnx_model_name)
    apex.set_model_dtype(trt.infer.DataType.FLOAT)
    apex.set_print_layer_info(False)
    trt_parser = onnxparser.create_onnxparser(apex)

    data_type = apex.get_model_dtype()
    onnx_filename = apex.get_model_file_name()
    trt_parser.parse(onnx_filename, data_type)
    trt_parser.convert_to_trtnetwork()
    trt_network = trt_parser.get_trtnetwork()

    G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
    builder = trt.infer.create_infer_builder(G_LOGGER)
    builder.set_max_batch_size(16)
    engine = builder.build_cuda_engine(trt_network)
    modelstream = engine.serialize()
    trt.utils.write_engine_to_file(args.trt32_model_name, modelstream)
    engine.destroy()
    builder.destroy()
Esempio n. 5
0
def run_onnx(onnx_file, data_type, bs, inp):
    # Create onnx_config
    apex = onnxparser.create_onnxconfig()
    apex.set_model_file_name(onnx_file)
    apex.set_model_dtype(convert_to_datatype(data_type))

    # create parser
    trt_parser = onnxparser.create_onnxparser(apex)
    assert (trt_parser)
    data_type = apex.get_model_dtype()
    onnx_filename = apex.get_model_file_name()
    trt_parser.parse(onnx_filename, data_type)
    trt_parser.report_parsing_info()
    trt_parser.convert_to_trtnetwork()
    trt_network = trt_parser.get_trtnetwork()
    assert (trt_network)

    # create infer builder
    trt_builder = trt.infer.create_infer_builder(G_LOGGER)
    trt_builder.set_max_batch_size(max_batch_size)
    trt_builder.set_max_workspace_size(max_workspace_size)

    if (apex.get_model_dtype() == trt.infer.DataType_kHALF):
        print(
            "-------------------  Running FP16 -----------------------------")
        trt_builder.set_half2_mode(True)
    elif (apex.get_model_dtype() == trt.infer.DataType_kINT8):
        print(
            "-------------------  Running INT8 -----------------------------")
        trt_builder.set_int8_mode(True)
    else:
        print(
            "-------------------  Running FP32 -----------------------------")

    print("----- Builder is Done -----")
    print("----- Creating Engine -----")
    trt_engine = trt_builder.build_cuda_engine(trt_network)
    print("----- Engine is built -----")
    time_inference(engine, bs, inp)
Esempio n. 6
0
def run_onnx(onnx_file, data_type, bs, inp):
    # Create onnx_config
    apex = onnxparser.create_onnxconfig()
    apex.set_model_file_name(onnx_file)
    apex.set_model_dtype(convert_to_datatype(data_type))

     # create parser
    trt_parser = onnxparser.create_onnxparser(apex)
    assert(trt_parser)
    data_type = apex.get_model_dtype()
    onnx_filename = apex.get_model_file_name()
    trt_parser.parse(onnx_filename, data_type)
    trt_parser.report_parsing_info()
    trt_parser.convert_to_trtnetwork()
    trt_network = trt_parser.get_trtnetwork()
    assert(trt_network)

    # create infer builder
    trt_builder = trt.infer.create_infer_builder(G_LOGGER)
    trt_builder.set_max_batch_size(max_batch_size)
    trt_builder.set_max_workspace_size(max_workspace_size)
    
    if (apex.get_model_dtype() == trt.infer.DataType_kHALF):
        print("-------------------  Running FP16 -----------------------------")
        trt_builder.set_half2_mode(True)
    elif (apex.get_model_dtype() == trt.infer.DataType_kINT8): 
        print("-------------------  Running INT8 -----------------------------")
        trt_builder.set_int8_mode(True)
    else:
        print("-------------------  Running FP32 -----------------------------")
        
    print("----- Builder is Done -----")
    print("----- Creating Engine -----")
    trt_engine = trt_builder.build_cuda_engine(trt_network)
    print("----- Engine is built -----")
    time_inference(engine, bs, inp)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tensorflow.python.ops import data_flow_ops
import tensorrt as trt

from tensorrt.parsers import onnxparser
apex = onnxparser.create_onnxconfig()

#create config object
apex.set_model_file_name("../model/onnx/hep_frozen_bs_64.onnx")
apex.set_model_dtype(trt.infer.DataType.FLOAT)
apex.set_print_layer_info(True)

#create parser
trt_parser = onnxparser.create_onnxparser(apex)
data_type = apex.get_model_dtype()
onnx_filename = apex.get_model_file_name()

#parse
trt_parser.parse(onnx_filename, data_type)

#retrieve the network from the parser
trt_parser.convert_to_trtnetwork()
trt_network = trt_parser.get_trtnetwork()
Esempio n. 8
0
def sample_onnx_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f", "--file_format", default="ascii", choices=["ascii", "ppm"], type=str, help="input file format. ASCII if not specified.")
    parser.add_argument("-i", "--image_file", type=str, required=True, help="Image or ASCII file")
    parser.add_argument("-r", "--reference_file", type=str, required=True, help="Reference files with correct labels")
    parser.add_argument("-k", "--topK", type=str, required=True, help="Top K values predictions to print")
    parser.add_argument("-m", "--model_file", type=str, required=True, help="ONNX Model file")
    parser.add_argument("-d", "--data_type", default=32, type=int, choices=[8, 16, 32], help="Supported data type i.e. 8, 16, 32 bit")
    parser.add_argument("-b", "--max_batch_size", default=32, type=int, help="Maximum batch size")
    parser.add_argument("-w", "--max_workspace_size", default=1024*1024, type=int, help="Maximum workspace size")
    parser.add_argument("-v", "--add_verbosity", action="store_true")
    parser.add_argument("-q", "--reduce_verbosity", action="store_true")
    parser.add_argument("-l", "--print_layer_info", action="store_true")
    args = parser.parse_args()

    file_format = str.strip(args.file_format)
    image_file = str.strip(args.image_file)
    reference_file = str.strip(args.reference_file)
    model_file = str.strip(args.model_file)
    topK = int(args.topK)
    max_batch_size = args.max_batch_size
    max_workspace_size = args.max_workspace_size
    data_type = args.data_type
    add_verbosity = args.add_verbosity
    reduce_verbosity = args.reduce_verbosity
    print_layer_info = args.print_layer_info

    print("Input Arguments: ")
    print("file_format", file_format)
    print("image_file", image_file)
    print("reference_file",reference_file)
    print("topK", str(topK))
    print("model_file",model_file)
    print("data_type",data_type)
    print("max_workspace_size",max_workspace_size)
    print("max_batch_size",max_batch_size)
    print("add_verbosity", add_verbosity)
    print("reduce_verbosity", reduce_verbosity)
    print("print_layer_info",print_layer_info)

    # Create onnx_config
    apex = onnxparser.create_onnxconfig()
    apex.set_model_file_name(model_file)
    apex.set_model_dtype(convert_to_datatype(data_type))
    if print_layer_info:
        apex.set_print_layer_info(True)
    if add_verbosity:
        apex.add_verbosity()
    if reduce_verbosity:
        apex.reduce_verbosity()

    # set batch size
    batch_size = 1

    # create parser
    trt_parser = onnxparser.create_onnxparser(apex)
    assert(trt_parser)
    data_type = apex.get_model_dtype()
    onnx_filename = apex.get_model_file_name()
    trt_parser.parse(onnx_filename, data_type)
    trt_parser.report_parsing_info()
    trt_parser.convert_to_trtnetwork()
    trt_network = trt_parser.get_trtnetwork()
    assert(trt_network)

    # create infer builder
    trt_builder = trt.infer.create_infer_builder(G_LOGGER)
    trt_builder.set_max_batch_size(max_batch_size)
    trt_builder.set_max_workspace_size(max_workspace_size)

    if (apex.get_model_dtype() == trt.infer.DataType_kHALF):
        print("-------------------  Running FP16 -----------------------------")
        trt_builder.set_fp16_mode(True)
    elif (apex.get_model_dtype() == trt.infer.DataType_kINT8):
        print("Int8 Model not supported")
        sys.exit()
    else:
        print("-------------------  Running FP32 -----------------------------")

    print("----- Builder is Done -----")
    print("----- Creating Engine -----")
    trt_engine = trt_builder.build_cuda_engine(trt_network)
    print("----- Engine is built -----")

    # create input vector
    input_img = prepare_input(image_file, trt_engine, file_format)

    if input_img.size == 0:
        msg = "sampleONNX the input tensor is of zero size - please check your path to the input or the file type"
        G_LOGGER.log(trt.infer.Logger.Severity_kERROR, msg)

    trt_context = trt_engine.create_execution_context()
    output = inference_image(trt_context, input_img, batch_size)

    # post processing stage
    process_output(output, file_format, reference_file, topK)

    # clean up
    trt_parser.destroy()
    trt_network.destroy()
    trt_context.destroy()
    trt_engine.destroy()
    trt_builder.destroy()
    print("&&&& PASSED Onnx Parser Tested Successfully")