Example #1
0
def generate_engine(uff_file, G_LOGGER):
    parser = uffparser.create_uff_parser()
    parser.register_input("input_1", (3, 416, 416), 0)
    parser.register_output("conv2d_23/BiasAdd")
    engine = trt.utils.uff_file_to_trt_engine(G_LOGGER, uff_file, parser, 1,
                                              1 << 30)
    return engine
    def create_graph(self):
        uff_model = uff.from_tensorflow_frozen_model(
            self.facenet, ['InceptionResnetV2/Bottleneck/BatchNorm/Reshape_1'],
            list_nodes=False)

        G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
        parser = uffparser.create_uff_parser()
        parser.register_input('input_image', (3, 160, 160), 0)
        parser.register_output(
            'InceptionResnetV2/Bottleneck/BatchNorm/Reshape_1')

        engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1,
                                             1 << 31)

        parser.destroy()

        runtime = trt.infer.create_infer_runtime(G_LOGGER)
        self.context = engine.create_execution_context()

        self.output = np.empty((1, 128), dtype=np.float32)
        self.d_input = cuda.mem_alloc(1 * 160 * 160 * 3 * 4)
        self.d_output = cuda.mem_alloc(1 * 128 * 4)

        self.bindings = [int(self.d_input), int(self.d_output)]
        print('here')
        self.stream = cuda.Stream()
Example #3
0
def main():
    # generate test case for our engine
    img_input = DATA + '/VOC2012/JPEGImages/2008_000016.jpg'
    img, img_id, img_w, img_h = get_testcase(img_input)  #img in ppm format

    # convert model to UFF
    uff_model = uff.from_tensorflow_frozen_model(
        '/tiny-yolo-voc/tiny-yolo-graph-tf17.pb', ["22-convolutional"])

    # convert model to TensorRT model
    model_parser = uffparser.create_uff_parser()
    model_parser.register_input("input", (3, 416, 416),
                                0)  #input name, input dims, input order
    model_parser.register_output("22-convolutional")

    # create engine, context, and runtime
    engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, model_parser,
                                         MAX_BATCH_SIZE, MAX_WORKSPACE)

    assert (engine)

    runtime = trt.infer.create_infer_runtime(G_LOGGER)
    context = engine.create_execution_context()
    context.set_profiler(G_PROFILER)

    if (TIMEIT):
        time_inference(context, engine, 1)
    else:
        if (VALIDATE):
            f = open("/tiny-yolo-voc/2012_val.txt", "r")
            for image_path in f:
                image_path = image_path.strip()
                image_jpg = image_path.split("/")[-1]
                img_input = DATA + '/VOC2012/JPEGImages/' + image_jpg
                img, img_id, img_w, img_h = get_testcase(img_input)
                out = infer(
                    context, img, OUTPUT_SIZE, 1
                )  # infer use context.enqueue(): asynchronous process with cuda stream. TensorRT does not support profiling on this at the moment

                # parse output
                output_parser = yoloparser.yolov2parser(
                    out, output_wd, nclass, nbox, class_name, biases)
                result = output_parser.interpret(threshold, nms, img_w, img_h)
                save_results(img_input, result, img_w, img_h, img_id,
                             "/tiny-yolo-voc/results/")
        else:
            out = infer(
                context, img, OUTPUT_SIZE, 1
            )  # infer use context.enqueue(): asynchronous process with cuda stream. TensorRT does not support profiling on this at the moment

            # parse output
            output_parser = yoloparser.yolov2parser(out, output_wd, nclass,
                                                    nbox, class_name, biases)
            result = output_parser.interpret(threshold, nms, img_w, img_h)
            save_results(img_input, result, img_w, img_h, img_id,
                         "/tiny-yolo-voc/results/")

    context.destroy()
    engine.destroy()
    runtime.destroy()
Example #4
0
def main(args):

    input = [args.input_placeholder]
    output = args.output_placeholders.split(',')

    dims = map(int, args.dimensions.split(','))
    assert (len(dims) == 3), 'Input dimensions must be given in CHW format.'

    # Convert tensorflow pb file to uff stream for tensorRT
    uff_model = uff.from_tensorflow_frozen_model(frozen_file=args.frozen_file,
                                                 input_nodes=input,
                                                 output_nodes=output)

    # Create parser for uff file and register input placeholder
    parser = uffparser.create_uff_parser()
    parser.register_input(args.input_placeholder, dims, uffparser.UffInputOrder_kNCHW)

    # Create a tensorRT engine which is ready for immediate use.
    # For this example, we will serialize it for fast instantiation later.
    G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
    engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser,
                                         args.max_batch_size, 1 << args.max_workspace_size, trt.infer.DataType.FLOAT)
    assert (engine)

    # Serialize the engine to given file path
    serialize_engine(engine, args.file_path)
    engine.destroy()
Example #5
0
def main():
    path = os.path.dirname(os.path.realpath(__file__))

    tf_model = lenet5.learn()

    uff_model = uff.from_tensorflow(tf_model, ["fc2/Relu"])
    #Convert Tensorflow model to TensorRT model

    parser = uffparser.create_uff_parser()
    parser.register_input("Placeholder", (1, 28, 28), 0)
    parser.register_output("fc2/Relu")
    engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser,
                                         MAX_BATCHSIZE, MAX_WORKSPACE)

    assert (engine)

    # parser.destroy()
    context = engine.create_execution_context()

    print("\n| TEST CASE | PREDICTION |")
    for i in range(ITERATIONS):
        img, label = lenet5.get_testcase()
        img = img[0]
        label = label[0]
        out = infer(context, img, 1)
        print("|-----------|------------|")
        print("|     " + str(label) + "     |      " + str(np.argmax(out)) +
              "     |")
Example #6
0
def main():
    path = dir_path = os.path.dirname(os.path.realpath(__file__))

    #Convert uff model to TensorRT model
    parser = uffparser.create_uff_parser()
    parser.register_input("Input_0", (1, 28, 28), 0)
    parser.register_output("Binary_3")

    engine = trt.utils.uff_file_to_trt_engine(G_LOGGER, MODEL, parser,
                                              MAX_BATCHSIZE, MAX_WORKSPACE,
                                              trt.infer.DataType.FLOAT)

    assert (engine)

    # parser.destroy()

    rand_file = randint(0, 9)
    img = get_testcase(DATA + str(rand_file) + '.pgm')
    data = normalize(img)

    print("Test case: " + str(rand_file))

    out = infer(engine, data, 1)

    print("Prediction: " + str(np.argmax(out)))
Example #7
0
    def mk_TensorRT_engine(self):
        #モデルがない場合学習をさせる
        if not tf.train.get_checkpoint_state(os.path.join(save_dir, "model.ckpt")):
            self.fit()

        #学習済みモデルを読み込む
        with tf.Session() as sess:
            saver = tf.train.Saver(tf.global_variables())
            saver.restore(sess, "save/model.ckpt")
            graph_def = sess.graph_def()
            frozen_graph = tf.graph_util.convert_variables_to_constants(sess, graph_def, ["inference/softmax"])
            tf_model _ tf.graph_util.remove_training_nodes(frozen_graph)

        # Tensorflowのモデル形式からUFFへ変換
        uff_model = uff.from_tensorflow(tf_model, ["inference/softmax"])

        # TensorRT EngineのためのUFF Streamを作る
        G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)

        # uff parserを作り,モデルの入出力に関する情報を加える
        parser = uffparser.create_uff_parser()
        # (channel, im_size, im_size)
        parser.register_input("Placeholder", (1,28,28), 0)
        parser.register_output("inference/softmax")

        # utility関数を用いてエンジンを作る(最後の引数はmax batch size と max workspace size)
        engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, MAX_BATCH_SIZE, MAX_WORKSPACE_SIZE)
        parser.destroy()

        return engine
Example #8
0
def main():
    args = parse_args()
    height, width, channel = 368, 432, 3
    images = []
    for name in args.images.split(','):
        x = read_imgfile(
            name, width, height,
            'channels_first')  # channels_first is required for tensorRT
        images.append(x)

    model_func = _get_model_func(args.base_model)
    model_inputs, model_outputs = model_func()
    input_names = [p.name[:-2] for p in model_inputs]
    output_names = [p.name[:-2] for p in model_outputs]

    print('input names: %s' % ','.join(input_names))
    print('output names: %s' %
          ','.join(output_names))  # outputs/conf,outputs/paf

    # with tf.Session() as sess:
    sess = tf.InteractiveSession()
    measure(lambda: tl.files.load_and_assign_npz_dict(args.path_to_npz, sess),
            'load npz')
    frozen_graph = tf.graph_util.convert_variables_to_constants(
        sess, sess.graph_def, output_names)
    tf_model = tf.graph_util.remove_training_nodes(frozen_graph)
    uff_model = measure(lambda: uff.from_tensorflow(tf_model, output_names),
                        'uff.from_tensorflow')
    print('uff model created')

    parser = uffparser.create_uff_parser()
    inputOrder = 0  # NCHW, https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/c_api/_nv_uff_parser_8h_source.html
    parser.register_input(input_names[0], (channel, height, width), inputOrder)
    for name in output_names:
        parser.register_output(name)

    G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO)
    max_batch_size = 1
    max_workspace_size = 1 << 30
    engine = measure(
        lambda: trt.utils.uff_to_trt_engine(
            G_LOGGER, uff_model, parser, max_batch_size, max_workspace_size),
        'trt.utils.uff_to_trt_engine')
    print('engine created')

    f_height, f_width = (height / 8, width / 8
                         )  #  TODO: derive from model_outputs
    post_process = PostProcessor((height, width), (f_height, f_width),
                                 'channels_first')

    for idx, x in enumerate(images):
        conf, paf = measure(lambda: infer(engine, x, 1), 'infer')
        humans, heat_up, paf_up = measure(lambda: post_process(conf, paf),
                                          'post_process')
        print('got %d humans' % (len(humans)))
        plot_humans(x.transpose([1, 2, 0]), heat_up, paf_up, humans,
                    '%02d' % (idx + 1))
Example #9
0
def uff2engine(frozen_input_name, net_input_shape, frozen_output_name,
               uff_path, engine_path):
    with open(uff_path, 'rb') as f:
        uff_model = f.read()
        G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
        parser = uffparser.create_uff_parser()
        parser.register_input(frozen_input_name, net_input_shape, 0)
        parser.register_output(frozen_output_name)
        engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1,
                                             1 << 30)
        parser.destroy()
        trt.utils.write_engine_to_file(engine_path, engine.serialize())
def create_and_save_inference_engine():

    INPUT_LAYERS = [config['input_layer']]
    OUTPUT_LAYERS = [config['output_layer']]
    INFERENCE_BATCH_SIZE = config['inference_batch_size']

    INPUT_C = 1
    INPUT_H = config['image_dim']
    INPUT_W = config['image_dim']

    # Load your newly created Tensorflow frozen model and convert it to UFF
    uff_model = uff.from_tensorflow_frozen_model(config['frozen_model_file'],
                                                 OUTPUT_LAYERS)

    # Now that we have a UFF model, we can generate a TensorRT engine by creating a logger for TensorRT.
    G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)

    # Create a UFF parser to parse the UFF file created from your TF Frozen model and identify the desired input and output nodes
    parser = uffparser.create_uff_parser()
    parser.register_input(INPUT_LAYERS[0], (INPUT_C, INPUT_H, INPUT_W), 0)
    parser.register_output(OUTPUT_LAYERS[0])

    # Build your TensorRT inference engine
    # This step performs (1) Tensor fusion (2) Reduced precision calibration
    # (3) Target-specific autotuning (4) Tensor memory management

    # Pass the logger, parser, the UFF model stream,
    # and some settings (max batch size and max workspace size)
    # to a utility function that will create the engine for us

    # Build your TensorRT inference engine
    if (config['precision'] == 'fp32'):
        engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser,
                                             INFERENCE_BATCH_SIZE, 1 << 20,
                                             trt.infer.DataType.FLOAT)

    elif (config['precision'] == 'fp16'):
        engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser,
                                             INFERENCE_BATCH_SIZE, 1 << 20,
                                             trt.infer.DataType.HALF)
    elif (config['precision'] == 'int8'):
        engine = trt.utils.uff_file_to_trt_engine(G_LOGGER, uff_model, parser,
                                                  INFERENCE_BATCH_SIZE,
                                                  1 << 20,
                                                  trt.infer.DataType.INT8)

    # engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1, 1 << 20)

    # Serialize TensorRT engine to a file for when you are ready to deploy your model.
    save_path = str(config['engine_save_dir']) + "tf_model_batch" \
                + str(INFERENCE_BATCH_SIZE) + "_" + str(config['precision']) + ".engine"
    trt.utils.write_engine_to_file(save_path, engine.serialize())
    print("Saved TensorRT engine to {}".format(save_path))
Example #11
0
def create_and_save_inference_engine():
    # Define network parameters, including inference batch size, name & dimensionality of input/output layers
    INPUT_LAYERS = [config['input_layer']]
    OUTPUT_LAYERS = [config['out_layer']]
    INFERENCE_BATCH_SIZE = config['inference_batch_size']

    INPUT_C = 3
    INPUT_H = config['image_dim']
    INPUT_W = config['image_dim']

    # Load your newly created Tensorflow frozen model and convert it to UFF
    uff_model = uff.from_tensorflow_frozen_model(config['frozen_model_file'], OUTPUT_LAYERS)

    # Create a UFF parser to parse the UFF file created from your TF Frozen model
    parser = uffparser.create_uff_parser()
    parser.register_input(INPUT_LAYERS[0], (INPUT_C,INPUT_H,INPUT_W),0)
    parser.register_output(OUTPUT_LAYERS[0])

    # Build your TensorRT inference engine
    if(config['precision'] == 'fp32'):
        engine = trt.utils.uff_to_trt_engine(
            G_LOGGER, 
            uff_model, 
            parser, 
            INFERENCE_BATCH_SIZE, 
            1<<20, 
            trt.infer.DataType.FLOAT
        )

    elif(config['precision'] == 'fp16'):
        engine = trt.utils.uff_to_trt_engine(
            G_LOGGER, 
            uff_model, 
            parser, 
            INFERENCE_BATCH_SIZE, 
            1<<20, 
            trt.infer.DataType.HALF
        )
    
    # Serialize TensorRT engine to a file for when you are ready to deploy your model.
    save_path = str(config['engine_save_dir']) + "keras_vgg19_b" \
        + str(INFERENCE_BATCH_SIZE) + "_"+ str(config['precision']) + ".engine"

    trt.utils.write_engine_to_file(save_path, engine.serialize())
    
    print("Saved TRT engine to {}".format(save_path))
Example #12
0
    def __init__(self, model, batch_size):
        # get Tensorflow graph object from Keras
        with K.get_session() as sess:
            image_batch_t = tf.placeholder(tf.float32,
                                           shape=(None, 1, 28, 28),
                                           name='image_tensor')
            K.set_learning_phase(0)
            conf_t = model(image_batch_t)
            output_names = [conf_t.name[:-2]]
            graphdef = sess.graph.as_graph_def()
            frozen_graph = tf.graph_util.convert_variables_to_constants(
                sess, graphdef, output_names)
            frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph)

        # convert TensorRT UFF object
        uff_model = uff.from_tensorflow(frozen_graph, output_names)
        G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
        parser = uffparser.create_uff_parser()
        input_shape = (1, 28, 28)
        parser.register_input("image_tensor", input_shape, 0)
        parser.register_output(output_names[0])

        # create TensorRT inference engine
        engine = trt.utils.uff_to_trt_engine(G_LOGGER,
                                             stream=uff_model,
                                             parser=parser,
                                             max_batch_size=batch_size,
                                             max_workspace_size=1 << 25)
        # datatype='FP32')

        parser.destroy()

        # allocate needed device buffers
        dims = engine.get_binding_dimensions(0).to_DimsCHW()
        nbytes = batch_size * dims.C() * dims.H() * dims.W() * np.dtype(
            np.float32).itemsize
        self.d_src = cuda.mem_alloc(nbytes)

        dims = engine.get_binding_dimensions(1).to_DimsCHW()
        nbytes = batch_size * dims.C() * dims.H() * dims.W() * np.dtype(
            np.float32).itemsize
        self.d_dst = cuda.mem_alloc(nbytes)

        self.engine = engine
        self.ctx = engine.create_execution_context()
        self.batch_size = batch_size
Example #13
0
 def parse_uff_model(self, uff_model=None, uff_path=None):
     assert uff_model or uff_path, "Must pass in either a UFF model or the path to an UFF model in disk"
     if uff_path:
         with open(uff_path, 'rb') as uff_file:
             uff_model = uff_file.read()
     parser = uffparser.create_uff_parser()
     # input_1
     parser.register_input(self.model_input_name, (3, 224, 224), 0)
     # dense_2/Sigmoid
     parser.register_output(self.model_output_name)
     engine = trt.utils.uff_to_trt_engine(logger=trt_logger,
                                          stream=uff_model,
                                          parser=parser,
                                          max_batch_size=MAX_BATCH_SIZE,
                                          max_workspace_size=MAX_WORKSPACE_SIZE,
                                          datatype=TRT_DATATYPE)
     context = engine.create_execution_context()
     return context
def main():
    tf_freeze_model = 'car_series/frozen_graph.pb'
    input_node = 'input'
    out_node = 'InceptionV4/Logits/Predictions'

    uff_model = uff.from_tensorflow_frozen_model(tf_freeze_model, [out_node])
    #Convert Tensorflow model to TensorRT model
    parser = uffparser.create_uff_parser()
    parser.register_input(input_node, (CHANNEL, INPUT_H, INPUT_W), 0)
    parser.register_output(out_node)

    engine = trt.utils.uff_to_trt_engine(G_LOGGER,
                                              uff_model,
                                              parser,
                                              MAX_BATCHSIZE,
                                              MAX_WORKSPACE)

    trt.utils.write_engine_to_file("car_series/car_series_tensorrt.engine", engine.serialize())
Example #15
0
def main():
    MAX_WORKSPACE = 1 << 30
    MAX_BATCHSIZE = 1
    # 若用了output_filename参数则返回的是NULL,否则返回的是序列化以后的UFF模型数据
    uff_model = uff.from_tensorflow_frozen_model(
        frozen_model_path, frozen_node_name
    )  #, output_filename=UFF_PATH, text=True, list_nodes=True)

    parser = uffparser.create_uff_parser()
    parser.register_input(frozen_input_name, NET_INPUT_IMAGE_SHAPE,
                          0)  # 0表示输入通道顺序NCHW,1表示输入通道顺序为NHWC
    parser.register_output(frozen_node_name[0])

    engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser,
                                         MAX_BATCHSIZE, MAX_WORKSPACE)

    # save engine
    trt.utils.write_engine_to_file(ENGINE_PATH, engine.serialize())

    assert (engine)

    # parser.destroy()
    context = engine.create_execution_context()

    print("\n| TEST CASE | PREDICTION |")
    pair = imgTestData[0]
    correct = 0
    for img, label in pair:
        output = infer(context, img, 1)

        # my frozen graph output is logists , here need convert to softmax
        softmax = np.exp(output) / np.sum(np.exp(output))
        predict = np.argmax(softmax)

        if int(label) == predict:
            correct += 1
        print(
            "|-------|--------|--------------------------------------------------------"
        )
        print("|   " + str(label) + "   |    " + str(predict) + "   |    " +
              str(['{:.2f}%'.format(i * 100) for i in softmax]) + "   ")

    accuracy = correct / len(pair)
    print("Accuracy = ", accuracy)
Example #16
0
    def _create_engine(self, modelstream, **kwargs):
        '''
        Helper to create engine when trying to build from models
        '''
        self.log_info("Parsing Model from {}".format(self.src_framework))
        if self.src_framework == "uff":
            parser = uffparser.create_uff_parser()
            for k, v in kwargs["input_nodes"].items():
                parser.register_input(k, v, 0)

            for o in kwargs["output_nodes"]:
                parser.register_output(o)

            if modelstream:
                self.engine = trt.utils.uff_to_trt_engine(
                    self.logger,
                    modelstream,
                    parser,
                    self.max_batch_size,
                    self.max_workspace_size,
                    self.data_type,
                    None,  #TODO: Figure out if plugins are supported in UFF
                    kwargs.get("calibrator", None))
            else:
                self.engine = trt.utils.uff_file_to_trt_engine(
                    self.logger,
                    kwargs["path"],
                    parser,
                    self.max_batch_size,
                    self.max_workspace_size,
                    self.data_type,
                    None,  #TODO: Figure out if plugins are supported in UFF
                    kwargs.get("calibrator", None))

            parser.destroy()

        elif self.src_framework == "caffe":
            self.engine = trt.utils.caffe_to_trt_engine(
                self.logger, kwargs["deployfile"], kwargs["modelfile"],
                self.max_batch_size, self.max_workspace_size,
                kwargs["output_nodes"], self.data_type,
                kwargs.get("plugins", None), kwargs.get("calibrator", None))
Example #17
0
def init_models(use_divice, model_file):
    if use_divice == "GPU":
        os.environ['CUDA_VISIBLE_DEVICES'] = Config.TEST_GPU_ID
    # load model
    uff_model = open(model_file, 'rb').read()
    parser = uffparser.create_uff_parser()
    parser.register_input("input_images", (3, 768, 768), 0)
    parser.register_output("feature_fusion/concat_3")
    # create inference engine and context (aka session)
    trt_logger = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO)
    engine = trt.utils.uff_to_trt_engine(
        logger=trt_logger,
        stream=uff_model,
        parser=parser,
        max_batch_size=1,  # 1 sample at a time
        max_workspace_size=1 << 20,  # 1 GB GPU memory workspace
        datatype=trt.infer.DataType.FLOAT
    )  # that's very cool, you can set precision
    context = engine.create_execution_context()
    return context
Example #18
0
def createTrtFromUFF(modelpath):
    MAX_WORKSPACE = 1 << 30
    G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO)

    parser = uffparser.create_uff_parser()
    parser.register_input("enc_text", (1, VOC_LEN, 1), 0)
    parser.register_input("dec_text", (1, VOC_LEN, 1), 1)
    parser.register_input("h0_in", (1, DIM, 1), 2)
    parser.register_input("c0_in", (1, DIM, 1), 3)
    parser.register_input("h1_in", (1, DIM, 1), 4)
    parser.register_input("c1_in", (1, DIM, 1), 5)

    parser.register_output("h0_out")
    parser.register_output("c0_out")
    parser.register_output("h1_out")
    parser.register_output("c1_out")
    parser.register_output("final_output")

    engine = trt.utils.uff_file_to_trt_engine(G_LOGGER, modelpath, parser, MAX_BATCHSIZE, MAX_WORKSPACE, trt.infer.DataType.FLOAT)
    print '[ChatBot] Successfully create TensorRT engine from file '+modelpath
    return engine
Example #19
0
def main():
    args = parse_args()
    
    # Convert pb to uff
    uff_model = uff.from_tensorflow_frozen_model(args.pb_path, [args.output_node])

    # Create UFF parser and logger
    parser = uffparser.create_uff_parser()

    INPUT_SIZE = [3 , args.image_size , args.image_size]

    parser.register_input(args.input_node,INPUT_SIZE , 0)
    parser.register_output(args.output_node)
    G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO)

    # Convert uff to plan
    if args.calib_images_dir:
        calibration_files = [os.path.join(args.calib_images_dir,i)
                        for i in os.listdir(args.calib_images_dir)]
    else:
        calibration_files = []
    batchstream = ImageBatchStream(args.max_batch_size, calibration_files,INPUT_SIZE)
    int8_calibrator = PythonEntropyCalibrator([args.input_node], batchstream)

    if args.int8:
        engine = trt.utils.uff_to_trt_engine(
            G_LOGGER, uff_model, 
            parser, 
            args.max_batch_size, args.max_workspace,
            datatype = trt.infer.DataType.INT8,
            calibrator = int8_calibrator
        )
    else:
        engine = trt.utils.uff_to_trt_engine(
            G_LOGGER, uff_model, 
            parser, 
            args.max_batch_size, args.max_workspace
        )
    
    trt.utils.write_engine_to_file(args.engine_path, engine.serialize())
Example #20
0
    def create_graph(self):
        """"""
        uff_model = uff.from_tensorflow_frozen_model(
            self.model_file, ['InceptionResnetV2/Logits/Predictions'])
        G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
        parser = uffparser.create_uff_parser()
        parser.register_input('input_image', (3, 512, 512), 0)
        parser.register_output('InceptionResnetV2/Logits/Predictions')

        engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1,
                                             1 << 32)

        parser.destroy()

        runtime = trt.infer.create_infer_runtime(G_LOGGER)
        self.context = engine.create_execution_context()

        self.output = np.empty(len(self.id2name), dtype=np.float32)
        self.d_input = cuda.mem_alloc(1 * 512 * 512 * 3 * 4)
        self.d_output = cuda.mem_alloc(1 * len(self.id2name) * 4)

        self.bindings = [int(self.d_input), int(self.d_output)]
        self.stream = cuda.Stream()
def create_engine(name,
                  model_path,
                  height,
                  width,
                  input_layer='image',
                  output_layer='Openpose/concat_stage7',
                  half16=False):
    if not os.path.exists(name):
        # Load your newly created Tensorflow frozen model and convert it to UFF
        # import pdb; pdb.set_trace();
        uff_model = uff.from_tensorflow_frozen_model(
            model_path,
            [output_layer])  # , output_filename = 'mobilepose.uff')
        dump = open(name.replace('engine', 'uff'), 'wb')
        dump.write(uff_model)
        dump.close()
        # Create a UFF parser to parse the UFF file created from your TF Frozen model
        parser = uffparser.create_uff_parser()
        parser.register_input(input_layer, (3, height, width), 0)
        parser.register_output(output_layer)

        # Build your TensorRT inference engine
        # This step performs (1) Tensor fusion (2) Reduced precision
        # (3) Target autotuning (4) Tensor memory management
        engine = trt.utils.uff_to_trt_engine(
            G_LOGGER,
            uff_model,
            parser,
            1,
            1 << 20,
            datatype=trt.infer.DataType.FLOAT
            if not half16 else trt.infer.DataType.HALF)
        trt.utils.write_engine_to_file(name, engine.serialize())
    else:
        engine = trt.utils.load_engine(G_LOGGER, name)

    return engine
Example #22
0
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
from random import randint  # generate a random test case
from PIL import Image
import time  #import system tools
import os

import uff
import tensorrt as trt
from tensorrt.parsers import uffparser

trt.utils.get_uff_version()
parser = uffparser.create_uff_parser()


def get_uff_required_version(parser):
    return str(parser.get_uff_required_version_major()) + '.' + str(
        parser.get_uff_required_version_minor()) + '.' + str(
            parser.get_uff_required_version_patch())


if trt.utils.get_uff_version() != get_uff_required_version(parser):
    raise ImportError("""ERROR: UFF TRT Required version mismatch""")
#
STARTER_LEARNING_RATE = 1e-4
BATCH_SIZE = 10
NUM_CLASSES = 10
MAX_STEPS = 1000
    def main():
        train_X=get_data()

        tensorrt_input=train_X.reshape(3,28,28)

        tensorrt_input=tensorrt_input.astype(np.float32)
        X = tf.placeholder("float", shape=[1, 28, 28, 3])
        h_conv1=forward_prop(X)

        # saver = tf.train.Saver()
        init = tf.global_variables_initializer()
        sess = tf.Session()
        sess.run(init)

        tf.train.write_graph(sess.graph_def, '.', 'hellotensor.pbtxt')

        final_result=sess.run(h_conv1,feed_dict={X:train_X})

        # print(final_result)

        #saver.save(sess, './hellotensor.ckpt')

        output_graph_name='./hellotensor.pb'
        output_node_names='Conv2D'

        output_graph_def = graph_util.convert_variables_to_constants(sess,sess.graph_def,output_node_names.split(","))
        output_graph_def = tf.graph_util.remove_training_nodes(output_graph_def)

        uff_model = uff.from_tensorflow(output_graph_def, output_nodes=['Conv2D'])
        dump = open('slimConv.uff', 'wb')
        dump.write(uff_model)
        dump.close()

        # with tf.gfile.GFile(output_graph_name, "wb") as f:
        #     f.write(output_graph_def.SerializeToString())

        uff_model = open("/home/dami/TensorRt_test/slimConv.uff", 'rb').read()
        G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
        parser = uffparser.create_uff_parser()
        parser.register_input("Placeholder", (3, 28, 28), 0)
        parser.register_output("Conv2D")

        engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1, 1 << 20)

        parser.destroy()

        runtime = trt.infer.create_infer_runtime(G_LOGGER)
        context = engine.create_execution_context()

        dims_data = engine.get_binding_dimensions(0).to_DimsCHW()
        dims_out1 = engine.get_binding_dimensions(1).to_DimsCHW()

        _out0 = np.empty(dims_data.C() * dims_data.H() * dims_data.W(), dtype=np.float32)
        _out1 = np.empty(dims_out1.C() * dims_out1.H() * dims_out1.W(), dtype=np.float32)

        d_out0 = cuda.mem_alloc(1 * dims_data.C() * dims_data.H() * dims_data.W() * _out0.dtype.itemsize)
        d_out1 = cuda.mem_alloc(1 * dims_out1.C() * dims_out1.H() * dims_out1.W() * _out1.dtype.itemsize)

        bindings = [int(d_out0), int(d_out1)]

        stream = cuda.Stream()

        # transfer input data to device
        cuda.memcpy_htod_async(d_out0, tensorrt_input, stream)
        # execute model
        context.enqueue(1, bindings, stream.handle, None)
        # transfer predictions back
        cuda.memcpy_dtoh_async(_out1, d_out1, stream)
        # synchronize threads
        stream.synchronize()

        # re_array=_out1.reshape((13, 13, 32))

        if (_out1.shape != final_result.shape):
            results = final_result.reshape(_out1.shape)

        print(str(compare_arrays(results, _out1)))
        print(sumArray(_out1))
        print(sumArray(results))

        context.destroy()
        engine.destroy()
        runtime.destroy()
def main(unused_argv):
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    start_time = time.time()

    if True:
        folder = 'output/'
        if not os.path.exists(folder):
            try:
                os.makedirs(folder)
            except OSError:
                pass

        print(img_path)
        test_image = np.asarray(misc.imread(img_path), dtype=np.float32)
        pp_image = test_image

        print(pp_image.shape)
        x_d = pp_image.shape[0] - tile_size + 1
        y_d = pp_image.shape[1] - tile_size + 1

        # Getting a patch for every pixel in the image, unless separation > 1
        if separation > 1:
            i_sep = int(separation)
            x_values = np.arange(0, x_d, i_sep)
            y_values = np.arange(0, y_d, i_sep)
            if (x_d - 1) % i_sep != 0:
                x_values = np.append(x_values, x_d - 1)
            if (y_d - 1) % i_sep != 0:
                y_values = np.append(y_values, y_d - 1)
        else:
            x_values = np.arange(0, x_d)
            y_values = np.arange(0, y_d)

        #print(x_values, y_values)

        for x in x_values:
            for y in y_values:
                input_pipe_l.append(test_image[x:(x + tile_size),
                                               y:(y + tile_size)])
                #print (str(x) + ':' + str(y) + ' ' + str(x+tile_size) + ':' + str(y+tile_size))

        # input_g follows a shape of (num_patches, 28, 28)
        input_g = np.asarray(input_pipe_l, dtype=np.float32)

        print('Input pipeline constructed.')
        print('Input shape: ' + str(input_g.shape))
        pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": input_g},
                                                           num_epochs=1,
                                                           shuffle=False)

        # Lana's code for using uff model
        print("====================================")
        start_time = time.time()
        num_tiles = np.size(input_g, 0)
        ### Test code for visual confirmation
        #num_tiles = 4
        img1 = np.ascontiguousarray(test_image[216:244, 216:244])
        img2 = np.ascontiguousarray(test_image[244:272, 216:244])
        img3 = np.ascontiguousarray(test_image[216:244, 244:272])
        img4 = np.ascontiguousarray(test_image[244:272, 244:272])
        imgs = np.array([img1, img2, img3, img4])
        print("Processing " + str(num_tiles) + " tiles")

        ### General inference setup
        uff_model = open('uff_no_reshape.uff', 'rb').read()
        G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
        parser = uffparser.create_uff_parser()
        parser.register_input("Reshape", (1, tile_size, tile_size), 0)
        parser.register_output("output_score/output_relu")
        engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1,
                                             1 << 20)
        parser.destroy()
        runtime = trt.infer.create_infer_runtime(G_LOGGER)

        # Alocate device memory

        nn_in = np.ascontiguousarray(input_g[0, :, :])
        nn_out = np.empty(tile_size * tile_size * 2, dtype=np.float32)
        d_input = cuda.mem_alloc(1 * nn_in.size * nn_in.dtype.itemsize)
        d_output = cuda.mem_alloc(1 * nn_out.size * nn_out.dtype.itemsize)
        bindings = [int(d_input), int(d_output)]
        stream = cuda.Stream()
        out_imgs_ch1 = np.empty([tile_size, tile_size, num_tiles])
        out_imgs_ch2 = np.empty([tile_size, tile_size, num_tiles])

        for i in range(0, num_tiles):
            if num_tiles == 4:
                nn_in = imgs[i]
            else:
                nn_in = np.ascontiguousarray(input_g[i, :, :])
            context = engine.create_execution_context()
            #d_input = cuda.mem_alloc(1 * img.size * img.dtype.itemsize)
            #d_output = cuda.mem_alloc(1 * output.size * output.dtype.itemsize)
            #bindings = [int(d_input), int(d_output)]
            #stream = cuda.Stream()

            # Transfer input data to device
            cuda.memcpy_htod_async(d_input, nn_in, stream)
            # Execute model
            context.enqueue(1, bindings, stream.handle, None)
            # Transfer predictions back
            cuda.memcpy_dtoh_async(nn_out, d_output, stream)
            # Syncronize threads
            stream.synchronize()
            out_ch1 = np.reshape(nn_out[0:tile_size * tile_size],
                                 (tile_size, tile_size))
            out_ch2 = np.reshape(
                nn_out[tile_size * tile_size:tile_size * tile_size * 2],
                (tile_size, tile_size))
            context.destroy()
            out_imgs_ch1[:, :, i] = out_ch1
            out_imgs_ch2[:, :, i] = out_ch2
            #make_image(out_ch1, folder + img_name + str(i) + "_uff_out.png")

        ### General inference cleanup
        new_engine = trt.utils.load_engine(G_LOGGER, "./tf_mnist.engine")
        engine.destroy()
        new_engine.destroy()
        runtime.destroy()
        current_time = time.time() - start_time
        print("Inference complete. Time elapsed: %f seconds." % current_time)

        out0 = out_imgs_ch1[:, :, 0]
        out1 = out_imgs_ch1[:, :, 1]
        out2 = out_imgs_ch1[:, :, 2]
        out3 = out_imgs_ch1[:, :, 3]

        out_top = np.hstack((out0, out1))
        out_btm = np.hstack((out2, out3))
        out_final = np.vstack((out_top, out_btm))

        make_image(out_final, folder + img_name + "_uff_out_ch1.png")

        out0 = out_imgs_ch2[:, :, 0]
        out1 = out_imgs_ch2[:, :, 1]
        out2 = out_imgs_ch2[:, :, 2]
        out3 = out_imgs_ch2[:, :, 3]

        out_top = np.hstack((out0, out1))
        out_btm = np.hstack((out2, out3))
        out_final = np.vstack((out_top, out_btm))

        make_image(out_final, folder + img_name + "_uff_out_ch2.png")

        print("====================================")
Example #25
0
    def __init__(self, FLAGS, darknet=None):
        self.ntrain = 0

        if isinstance(FLAGS, dict):
            from ..defaults import argHandler
            newFLAGS = argHandler()
            newFLAGS.setDefaults()
            newFLAGS.update(FLAGS)
            FLAGS = newFLAGS

        self.FLAGS = FLAGS
        if self.FLAGS.tensor:
            with open(self.FLAGS.metaLoad, 'r') as fp:
                self.meta = json.load(fp)
            self.framework = create_framework(self.meta, self.FLAGS)
            MODEL_FILE = '/home/sergey/darkflow/built_graph/tiny-yolo-voc.uff'
            INPUT_NAME = "input"
            INPUT_SHAPE = (3, 416, 416)
            OUTPUT_NAME = "BiasAdd_8"
            TRT_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
            from tensorrt.parsers import uffparser
            #builder = trt.Builder(TRT_LOGGER)
            #network = builder.create_network()
            parser = uffparser.create_uff_parser()
            parser.register_input("input", (3, 416, 416), 0)
            parser.register_output(OUTPUT_NAME)
            #parser.parse(MODEL_FILE, network)
            import uff
            m = uff.from_tensorflow_frozen_model(MODEL_FILE, ["BiasAdd_8"])
            engine = trt.utils.uff_to_trt_engine(TRT_LOGGER, m, parser, 1,
                                                 1 << 20)
            print(engine)
            inputs = []
            outputs = []
            bindings = []
            stream = cuda.Stream()
            for binding in engine:
                size = trt.volume(
                    engine.get_binding_shape(binding)) * engine.max_batch_size
                dtype = trt.nptype(engine.get_binding_dtype(binding))
                # Allocate host and device buffers
                host_mem = cuda.pagelocked_empty(size, dtype)
                device_mem = cuda.mem_alloc(host_mem.nbytes)
                # Append the device buffer to device bindings.
                bindings.append(int(device_mem))
                # Append to the appropriate list.
                if engine.binding_is_input(binding):
                    inputs.append(HostDeviceMem(host_mem, device_mem))
                else:
                    outputs.append(HostDeviceMem(host_mem, device_mem))
            context = engine.create_execution_context()
            img = cv2.imread(
                '/home/sergey/darkflow/sample_img/sample_computer.jpg')
            img = self.framework.resize_input(img)
            img = img.transpose(2, 0, 1)
            h, w, _ = img.shape
            img = img.ravel()
            np.copyto(inputs[0].host, img)
            [
                cuda.memcpy_htod_async(inp.device, inp.host, stream)
                for inp in inputs
            ]
            # Run inference.
            context.execute_async(batch_size=1,
                                  bindings=bindings,
                                  stream_handle=stream.handle)
            # Transfer predictions back from the GPU.
            [
                cuda.memcpy_dtoh_async(out.host, out.device, stream)
                for out in outputs
            ]
            # Synchronize the stream
            stream.synchronize()

            [result] = [out.host for out in outputs]
            print(result)

            file = open('/home/sergey/q.txt', 'w')
            np.savetxt('/home/sergey/q.txt', result.ravel())
            out = np.ndarray(shape=(13, 13, 125), dtype=np.float32)
            out = result.reshape((13, 13, 125))
            boxes = self.framework.findboxes(out)
            threshold = self.FLAGS.threshold
            boxesInfo = list()
            for box in boxes:
                tmpBox = self.framework.process_box(box, h, w, threshold)
                if tmpBox is None:
                    continue
                boxesInfo.append({
                    "label": tmpBox[4],
                    "confidence": tmpBox[6],
                    "topleft": {
                        "x": tmpBox[0],
                        "y": tmpBox[2]
                    },
                    "bottomright": {
                        "x": tmpBox[1],
                        "y": tmpBox[3]
                    }
                })
            print(boxesInfo)
            return
        if self.FLAGS.pbLoad and self.FLAGS.metaLoad:
            self.say('\nLoading from .pb and .meta')
            self.graph = tf.Graph()
            device_name = FLAGS.gpuName \
             if FLAGS.gpu > 0.0 else None
            with tf.device(device_name):
                with self.graph.as_default() as g:
                    self.build_from_pb()
            return

        if darknet is None:
            darknet = Darknet(FLAGS)
            self.ntrain = len(darknet.layers)

        self.darknet = darknet
        args = [darknet.meta, FLAGS]
        self.num_layer = len(darknet.layers)
        self.framework = create_framework(*args)

        self.meta = darknet.meta

        self.say('\nBuilding net ...')
        start = time.time()
        self.graph = tf.Graph()
        device_name = FLAGS.gpuName \
         if FLAGS.gpu > 0.0 else None
        with tf.device(device_name):
            with self.graph.as_default() as g:
                self.build_forward()
                self.setup_meta_ops()
        self.say('Finished in {}s\n'.format(time.time() - start))
Example #26
0
import tensorrt as trt
import uff
from tensorrt.parsers import uffparser

G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO)



uff_model = uff.from_tensorflow_frozen_model("final.pb", ["dense_2/Softmax"])

INFERENCE_BATCH_SIZE = 256

parser = uffparser.create_uff_parser()

parser.register_input("conv2d_1_input", (1, 28, 28), 0)
parser.register_output("dense_2/Softmax")

engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, INFERENCE_BATCH_SIZE, 1<<20, trt.infer.DataType.FLOAT)

trt.utils.write_engine_to_file("test_engine.engine", engine.serialize())