Esempio n. 1
0
def user(multi_engine,
         run_graph=execute_graph,
         run_calibration=execute_calibration):
  """Example function that converts a graph to TFTRT graph."""
  if multi_engine:
    inp_dims = (2, 3, 7, 5)
    orig_graph = get_multi_engine_graph_def()
  else:
    inp_dims = (100, 24, 24, 2)
    orig_graph = get_simple_graph_def()  # use a frozen graph for inference
  dummy_input = np.random.random_sample(inp_dims)
  # Get optimized graph
  trt_graph = trt.create_inference_graph(
      input_graph_def=orig_graph,
      outputs=["output"],
      max_batch_size=inp_dims[0],
      max_workspace_size_bytes=1 << 25,
      precision_mode="FP32",  # TRT Engine precision "FP32","FP16" or "INT8"
      minimum_segment_size=2,  # minimum number of nodes in an engine
      is_dynamic_op=False,
      maximum_cached_engines=1,
      cached_engine_batches=[])
  o1 = run_graph(orig_graph, dummy_input)
  o2 = run_graph(trt_graph, dummy_input)
  o3 = run_graph(trt_graph, dummy_input)
  assert np.array_equal(o1, o2)
  assert np.array_equal(o3, o2)  # sanity check
  fp16_graph = trt.create_inference_graph(
      input_graph_def=orig_graph,
      outputs=["output"],
      max_batch_size=inp_dims[0],
      max_workspace_size_bytes=1 << 25,
      precision_mode="FP16",  # TRT Engine precision "FP32","FP16" or "INT8"
      minimum_segment_size=2,  # minimum number of nodes in an engine
      is_dynamic_op=False,
      maximum_cached_engines=1,
      cached_engine_batches=[])
  int8_calib_gdef = trt.create_inference_graph(
      input_graph_def=orig_graph,
      outputs=["output"],
      max_batch_size=inp_dims[0],
      max_workspace_size_bytes=1 << 25,
      precision_mode="INT8",  # TRT Engine precision "FP32","FP16" or "INT8"
      minimum_segment_size=2,  # minimum number of nodes in an engine
      is_dynamic_op=False,
      maximum_cached_engines=1,
      cached_engine_batches=[])
  o4 = run_graph(fp16_graph, dummy_input)
  _ = run_calibration(int8_calib_gdef, dummy_input)
  int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef)
  o5 = run_graph(int8_graph, dummy_input)
  print("Is FP32 == FP16? %s (False is possible)" % np.allclose(o1, o4))
  print("Is FP32 == INT8? %s (False is possible)" % np.allclose(o1, o5))
  print("Pass")
Esempio n. 2
0
def create_optimized_trt_graph(frozen_path, saving_path, output_node,
                               precision):
    """
    @param frozen_path: location of the original unoptimized frozen_graph.pb
    @param saving_path: where do you want the new .pb to be saved?
    @param output_node: name of the ANN's output node
    @precision: precision for optimization (e.g. FP16)
    """
    if len(output_node.split(':0')) < 2:
        output_node = output_node + ':0'

    alloc_space_TensorRT = 2
    ppgmf = (8 - alloc_space_TensorRT) / 8
    max_workspace_size_bytes = alloc_space_TensorRT * 1000000000

    with gfile.FastGFile(frozen_path, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        trt_graph = trt.create_inference_graph(
            input_graph_def=graph_def,
            outputs=[output_node],
            max_batch_size=32,
            max_workspace_size_bytes=max_workspace_size_bytes,
            minimum_segment_size=1,
            precision_mode=precision)

        path_new_frozen_pb = saving_path + "/newFrozenModel_TRT_" + precision + ".pb"
        with gfile.FastGFile(path_new_frozen_pb, 'wb') as fp:
            fp.write(trt_graph.SerializeToString())
Esempio n. 3
0
def main():

    # Open frozen.pb
    frozen_graph_def = get_frozen_graph('./TENSORFLOW_FROZEN.pb')

    # output node
    output_nodes = ['tower_0/refine_out/BatchNorm/FusedBatchNorm']

    # TensorRT inference graph
    trt_graph = trt.create_inference_graph(
        frozen_graph_def,
        output_nodes,
        max_batch_size=1,
        max_workspace_size_bytes=(2 << 10) << 20,
        precision_mode='FP16')

    print('!!!!!! trt graph create !!!!!!')

    # Write 'TRT_FP16.pb'
    write_graph_to_file('TRT_FP16.pb', trt_graph, './')

    # check how many ops of the original frozen model
    all_nodes = len([1 for n in frozen_graph_def.node])
    print("numb. of all_nodes in frozen graph:", all_nodes)

    # check how many ops that is converted to TensorRT engine
    trt_engine_nodes = len(
        [1 for n in trt_graph.node if str(n.op) == 'TRTEngineOp'])
    print("numb. of trt_engine_nodes in TensorRT graph:", trt_engine_nodes)

    all_nodes = len([1 for n in trt_graph.node])
    print("numb. of all_nodes in TensorRT graph:", all_nodes)
Esempio n. 4
0
def main():
    if not file_changed(os.path.join(SAVED_MODEL_DIR, FROZEN_GRAPH_NAME)):
        print("Frozen graph not changed, not rebuilding")
        return

    # What model to run from - should be the directory name of an exported trained model
    # Change me to the directory checkpoint files are saved in
    frozen_graph_name = os.path.join(SAVED_MODEL_DIR, FROZEN_GRAPH_NAME)
    if not os.path.isfile(frozen_graph_name):
        print("Frozen graph not found, building...")
        build_frozen_graph(config=CONFIG_FILE,
                           checkpoint=os.path.join(
                               SAVED_MODEL_DIR,
                               MODEL_CHECKPOINT_PREFIX + CHECKPOINT_NUMBER),
                           score_threshold=0.2,
                           batch_size=1)
    else:
        print("Frozen graph found, not rebuilding...")

    # read frozen graph from file
    frozen_graph, input_names, output_names = load_frozen_graph(
        frozen_graph_name)
    trt_graph = trt.create_inference_graph(
        input_graph_def=frozen_graph,
        outputs=output_names,
        max_batch_size=1,
        max_workspace_size_bytes=1 << 25,
        precision_mode='FP16',  # TODO - FP16 or INT8 for Jetson
        minimum_segment_size=50)

    with open(os.path.join(SAVED_MODEL_DIR, TRT_OUTPUT_GRAPH), 'wb') as f:
        f.write(trt_graph.SerializeToString())
Esempio n. 5
0
def TF_to_TRT():
    with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
            per_process_gpu_memory_fraction=0.3))) as sess:
        saver = tf.train.import_meta_graph("./tensorRT/model.meta")
        saver.restore(sess, "./tensorRT/model")
        your_outputs = ["fcn21/truediv"]

        frozen_graph = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names=your_outputs)
        with gfile.FastGFile("./tensorRT/frozen_model.pb", 'wb') as f:
            f.write(frozen_graph.SerializeToString())
        print("Frozen model is successfully stored!")
    trt_graph = trt.create_inference_graph(input_graph_def=frozen_graph,
                                           outputs=your_outputs,
                                           max_batch_size=1,
                                           max_workspace_size_bytes=930000000,
                                           precision_mode=PRECISION)

    with gfile.FastGFile("./tensorRT/" + NAME, 'wb') as f:
        f.write(trt_graph.SerializeToString())
    print("TensorRT model is successfully stored!")
    all_nodes = len([1 for n in frozen_graph.node])
    print("numb. of all_nodes in frozen graph:", all_nodes)
    trt_engine_nodes = len(
        [1 for n in trt_graph.node if str(n.op) == 'TRTEngineOp'])
    print("numb. of trt_engine_nodes in TensorRT graph:", trt_engine_nodes)
    all_nodes = len([1 for n in trt_graph.node])
    print("numb. of all_nodes in TensorRT graph:", all_nodes)
    return trt_graph
Esempio n. 6
0
def get_trt_graph(graph_name, graph_def, precision_mode, output_dir,
                  output_node, batch_size=128, workspace_size=1<<30):
  """Create and save inference graph using the TensorRT library.

  Args:
    graph_name: string, name of the graph to be used for saving.
    graph_def: GraphDef, the Frozen Graph to be converted.
    precision_mode: string, the precision that TensorRT should convert into.
      Options- FP32, FP16, INT8.
    output_dir: string, the path to where files should be written.
    output_node: string, the names of the output node that will
      be returned during inference.
    batch_size: int, the number of examples that will be predicted at a time.
    workspace_size: long, size in bytes that can be used during conversion.

  Returns:
    GraphDef for the TensorRT inference graph.
  """
  trt_graph = trt.create_inference_graph(
      graph_def, [output_node], max_batch_size=batch_size,
      max_workspace_size_bytes=workspace_size,
      precision_mode=precision_mode)

  write_graph_to_file(graph_name, trt_graph, output_dir)

  return trt_graph
Esempio n. 7
0
def get_trt_graph(graph_name, graph_def, precision_mode, output_dir,
                  output_node, batch_size=128, workspace_size=2<<10):
  """Create and save inference graph using the TensorRT library.

  Args:
    graph_name: string, name of the graph to be used for saving.
    graph_def: GraphDef, the Frozen Graph to be converted.
    precision_mode: string, the precision that TensorRT should convert into.
      Options- FP32, FP16, INT8.
    output_dir: string, the path to where files should be written.
    output_node: string, the names of the output node that will
      be returned during inference.
    batch_size: int, the number of examples that will be predicted at a time.
    workspace_size: int, size in megabytes that can be used during conversion.

  Returns:
    GraphDef for the TensorRT inference graph.
  """
  trt_graph = trt.create_inference_graph(
      graph_def, [output_node], max_batch_size=batch_size,
      max_workspace_size_bytes=workspace_size<<20,
      precision_mode=precision_mode)

  write_graph_to_file(graph_name, trt_graph, output_dir)

  return trt_graph
Esempio n. 8
0
def build_trt_pb(model_name, pb_path, download_dir='data'):
    """Build TRT model from the original TF model, and save the graph
    into a pb file for faster access in the future.

    The code was mostly taken from the following example by NVIDIA.
    https://github.com/NVIDIA-Jetson/tf_trt_models/blob/master/examples/detection/detection.ipynb
    """
    from tf_trt_models.detection import download_detection_model
    from tf_trt_models.detection import build_detection_graph
    from utils.egohands_models import get_egohands_model

    if 'coco' in model_name:
        config_path, checkpoint_path = \
            download_detection_model(model_name, download_dir)
    else:
        config_path, checkpoint_path = \
            get_egohands_model(model_name)
    frozen_graph_def, input_names, output_names = build_detection_graph(
        config_path=config_path, checkpoint=checkpoint_path)
    trt_graph_def = trt.create_inference_graph(
        input_graph_def=frozen_graph_def,
        outputs=output_names,
        max_batch_size=1,
        max_workspace_size_bytes=1 << 26,
        precision_mode='FP16',
        minimum_segment_size=50)
    with open(pb_path, 'wb') as pf:
        pf.write(trt_graph_def.SerializeToString())
Esempio n. 9
0
def main(argv):
    del argv  # Unused.

    original_saved_model_dir = FLAGS.saved_model_dir.rstrip('/')
    tensorrt_saved_model_dir = '{}_trt'.format(original_saved_model_dir)

    # Converts `SavedModel` to TensorRT inference graph.
    trt.create_inference_graph(None,
                               None,
                               input_saved_model_dir=original_saved_model_dir,
                               output_saved_model_dir=tensorrt_saved_model_dir)
    print('Model conversion completed.')

    # Gets the image.
    get_image_response = requests.get(FLAGS.image_url)
    number = FLAGS.number
    saved_model_dirs = [original_saved_model_dir, tensorrt_saved_model_dir]
    latencies = {}
    for saved_model_dir in saved_model_dirs:
        with tf.Graph().as_default():
            with tf.Session() as sess:

                # Loads the saved model.
                loader.load(sess, [tag_constants.SERVING], saved_model_dir)
                print('Model loaded {}'.format(saved_model_dir))

                def _run_inf(session=sess, n=1):
                    """Runs inference repeatedly."""
                    for _ in range(n):
                        session.run(FLAGS.model_outputs,
                                    feed_dict={
                                        FLAGS.model_input:
                                        [get_image_response.content]
                                    })

                # Run inference once to perform XLA compile step.
                _run_inf(sess, 1)

                start = time.time()
                _run_inf(sess, number)
                end = time.time()
                latencies[saved_model_dir] = end - start

    print('Time to run {} predictions:'.format(number))
    for saved_model_dir, latency in latencies.items():
        print('* {} seconds for {} runs for {}'.format(latency, number,
                                                       saved_model_dir))
Esempio n. 10
0
def getINT8CalibGraph(input_file, output_prefix, output, batch_size=128,workspace_size=1<<30):
  trt_graph = trt.create_inference_graph(getGraph(input_file), output,
                                         max_batch_size=batch_size,
                                         max_workspace_size_bytes=workspace_size,
                                         precision_mode="INT8")  # calibration
  with gfile.FastGFile(output_prefix+'.INT8Calib.pb','wb') as f:
    f.write(trt_graph.SerializeToString())
  return trt_graph
Esempio n. 11
0
def getFP16(input_file, output_prefix, output, batch_size=128,workspace_size=1<<30):
  trt_graph = trt.create_inference_graph(getGraph(input_file), output,
                                         max_batch_size=batch_size,
                                         max_workspace_size_bytes=workspace_size,
                                         precision_mode="FP16")  # Get optimized graph
  with gfile.FastGFile(output_prefix+'.FP16.pb','wb') as f:
    f.write(trt_graph.SerializeToString())
  return trt_graph
Esempio n. 12
0
def getFP16(batch_size=128,workspace_size=1<<30):
  trt_graph = trt.create_inference_graph(getResnet50(), [ "resnet_v1_50/predictions/Reshape_1"],
                                         max_batch_size=batch_size,
                                         max_workspace_size_bytes=workspace_size,
                                         precision_mode="FP16")  # Get optimized graph
  with gfile.FastGFile("resnetV150_TRTFP16.pb",'wb') as f:
    f.write(trt_graph.SerializeToString())
  return trt_graph
 def optimize_ocr(self, input_graph_def):
     output_graph_def = trt.create_inference_graph(
         input_graph_def=input_graph_def,
         outputs=['predicted_chars', 'predicted_scores'],
         max_batch_size=1,
         # max_workspace_size_bytes=(2 << 10) << 20,
         precision_mode=self.precision)
     return output_graph_def
Esempio n. 14
0
def getINT8CalibGraph(batch_size=128,workspace_size=1<<30):
  trt_graph = trt.create_inference_graph(getResnet50(), [ "resnet_v1_50/predictions/Reshape_1"],
                                         max_batch_size=batch_size,
                                         max_workspace_size_bytes=workspace_size,
                                         precision_mode="INT8")  # calibration
  with gfile.FastGFile("resnetV150_TRTINT8Calib.pb",'wb') as f:
    f.write(trt_graph.SerializeToString())
  return trt_graph
def getFP16(batch_size,workspace_size,network,output_nodes):
  trt_graph = trt.create_inference_graph(getResnet50(network),[output_nodes],
                                         max_batch_size=batch_size,
                                         max_workspace_size_bytes=workspace_size,
                                         precision_mode="FP16")  # Get optimized graph
  with gfile.FastGFile("resnetV250_TRTFP16_chest.pb",'wb') as f:
    f.write(trt_graph.SerializeToString())
  return trt_graph
def getINT8CalibGraph(batch_size,workspace_size,network,output_nodes):
  trt_graph = trt.create_inference_graph(getResnet50(network), [output_nodes],
                                         max_batch_size=batch_size,
                                         max_workspace_size_bytes=workspace_size,
                                         precision_mode="INT8")  # calibration
  with gfile.FastGFile("resnetV250_TRTINT8Calib_chest.pb",'wb') as f:
    f.write(trt_graph.SerializeToString())
  return trt_graph
Esempio n. 17
0
def getINT8CalibGraph(batch_size=128,workspace_size=1<<30):
  trt_graph = trt.create_inference_graph(getResnet50(), [ "resnet_v1_50/predictions/Reshape_1"],
                                         max_batch_size=batch_size,
                                         max_workspace_size_bytes=workspace_size,
                                         precision_mode="INT8")  # calibration
  with gfile.FastGFile("resnetV150_TRTINT8Calib.pb",'wb') as f:
    f.write(trt_graph.SerializeToString())
  return trt_graph
Esempio n. 18
0
def getFP16(batch_size=128,workspace_size=1<<30):
  trt_graph = trt.create_inference_graph(getResnet50(), [ "resnet_v1_50/predictions/Reshape_1"],
                                         max_batch_size=batch_size,
                                         max_workspace_size_bytes=workspace_size,
                                         precision_mode="FP16")  # Get optimized graph
  with gfile.FastGFile("resnetV150_TRTFP16.pb",'wb') as f:
    f.write(trt_graph.SerializeToString())
  return trt_graph
Esempio n. 19
0
def get_frozen_tftrt_model(bert_config, shape, use_one_hot_embeddings, init_checkpoint):
  tf_config = tf.ConfigProto()
  tf_config.gpu_options.allow_growth = True
  output_node_names = ['unstack']

  with tf.Session(config=tf_config) as tf_sess:
    input_ids = tf.placeholder(tf.int32, shape, 'input_ids')
    input_mask = tf.placeholder(tf.int32, shape, 'input_mask')
    segment_ids = tf.placeholder(tf.int32, shape, 'segment_ids')

    (start_logits, end_logits) = create_model(bert_config=bert_config,
                                              is_training=False,
                                              input_ids=input_ids,
                                              input_mask=input_mask,
                                              segment_ids=segment_ids,
                                              use_one_hot_embeddings=use_one_hot_embeddings)


    tvars = tf.trainable_variables()
    (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
    tf_sess.run(tf.global_variables_initializer())
    print("LOADED!")
    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      else:
        init_string = ", *NOTTTTTTTTTTTTTTTTTTTTT"
        tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape, init_string)

    frozen_graph = tf.graph_util.convert_variables_to_constants(tf_sess, 
            tf_sess.graph.as_graph_def(), output_node_names)

    num_nodes = len(frozen_graph.node)
    print('Converting graph using TensorFlow-TensorRT...')
    import tensorflow.contrib.tensorrt as trt
    frozen_graph = trt.create_inference_graph(
        input_graph_def=frozen_graph,
        outputs=output_node_names,
        max_batch_size=FLAGS.predict_batch_size,
        max_workspace_size_bytes=(4096 << 20) - 1000,
        precision_mode = "FP16" if FLAGS.use_fp16 else "FP32",
        minimum_segment_size=4,
        is_dynamic_op=True,
        maximum_cached_engines=1000
    )

    print('Total node count before and after TF-TRT conversion:',
          num_nodes, '->', len(frozen_graph.node))
    print('TRT node count:',
          len([1 for n in frozen_graph.node if str(n.op) == 'TRTEngineOp']))
    
    with tf.gfile.GFile("frozen_modelTRT.pb", "wb") as f:
      f.write(frozen_graph.SerializeToString())      
        
  return frozen_graph
Esempio n. 20
0
def optimize_model(args):
    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path
    absolute_model_dir = "/".join(input_checkpoint.split('/')[:-1])
    with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction = 0.5))) as sess:
        saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=True)
        saver.restore(sess, input_checkpoint)
        your_outputs = [args.output_tensors]
        frozen_graph = tf.graph_util.convert_variables_to_constants(sess, tf.get_default_graph().as_graph_def(), output_node_names=your_outputs)
        with gfile.FastGFile('/models/model.pb', 'wb') as f:
            f.write(frozen_graph.SerializableToSting())
        print("Frozen model is successfully stored!")


    trt_graph = trt.create_inference_graph(
                    input_graph_def=frozen_graph,
                    outputs=your_outputs,
                    max_batch_size=2,
                    max_workspace_size_bytes=2*(10**9),
                    precision_mode="FP32")

    with gfile.FastGFile('model/tensorrt_model.pb', 'wb') as f:
        f.write(trt_graph.SerializableToSting())
    print("tensorRT model is successfully stored!")



    all_nodes = len([1 for n in frozen_graph.node])
    print("no.s of nodes in frozen model", all_nodes)
    tensorrt_all_nodes = len([1 for n in trt_graph.node if str(n.op) == 'TRTEngine'])
    print("no.s of nodes in trt model graph", tensorrt_all_nodes)
    all_nodes = len([1 for n in trt_graph.node])
    print("no.s of nodes in trt model", all_nodes)


    def read_pb_graph(model):
    with gfile.FastGFile(model, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    return graph_def

    MODEL_PATH = 'model/model.pb'

    graph = tf.Graph()
    with graph.as_default():
    with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction = 0.5))) as sess:
        trt_graph = read_pb_graph(MODEL_PATH)
        tf.import_graph_def(trt_graph, name='')
        input = sess.graph.get_tensor_by_name(args.input_tensor + ':0')
        output = sess.graph.get_tensor_by_name(args.output_tensors + ':0')


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_dir", type=str, default="", help="Model folder to export")
    parser.add_argument("--input_tensor", type=str, help="name of input tensors")
    parser.add_argument("--output_tensors", type=str, help="name of output tensors")
    args = parser.parse_args()
 def _GetTrtGraph(self, gdef, precision_mode, is_dynamic_op):
     """Return trt converted graph."""
     return trt.create_inference_graph(input_graph_def=gdef,
                                       outputs=[OUTPUT_NAME],
                                       max_batch_size=self._input.shape[0],
                                       max_workspace_size_bytes=1 << 25,
                                       precision_mode=precision_mode,
                                       minimum_segment_size=2,
                                       is_dynamic_op=is_dynamic_op)
Esempio n. 22
0
    def load_frozen_model(self):
        if (self.model != self.prev_model):
            self.prev_model = self.model
            rospy.loginfo("load a new frozen model {}".format(self.model))
            detection_graph = tf.Graph()
            try:
                trt_graph = tf.GraphDef()
                with tf.gfile.GFile(
                        os.path.join(
                            os.path.dirname(os.path.realpath(__file__)),
                            self.model + "/trt.pb"), "rb") as f:
                    serialized_trt_graph = f.read()
                trt_graph.ParseFromString(serialized_trt_graph)
                rospy.loginfo("loading graph from file")
            except:
                od_graph_def = tf.GraphDef()
                with tf.gfile.GFile(
                        os.path.join(
                            os.path.dirname(os.path.realpath(__file__)),
                            self.model + "/frozen_inference_graph.pb"),
                        'rb') as fid:
                    serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)

                trt_graph = trt.create_inference_graph(
                    input_graph_def=od_graph_def,
                    outputs=[
                        "detection_boxes:0", "detection_scores:0",
                        "detection_classes:0", "num_detections:0"
                    ],
                    max_batch_size=1,
                    max_workspace_size_bytes=1 << 25,
                    precision_mode="FP32",
                    is_dynamic_op=False,
                    minimum_segment_size=50)

                rospy.loginfo("loading graph from scratch")

                with open(
                        os.path.join(
                            os.path.dirname(os.path.realpath(__file__)),
                            self.model + "/trt.pb"), "wb") as f:
                    f.write(trt_graph.SerializeToString())

            with detection_graph.as_default():

                rospy.loginfo("finish generating tensorrt engine")
                tf.import_graph_def(trt_graph, name='')

                rospy.loginfo("model is loaded!")
            return detection_graph, None, None

        else:

            rospy.loginfo("keep the previous model")
            return self.detection_graph, None, None
Esempio n. 23
0
def user(run_graph=execute_graph, run_calibration=execute_calibration):
    """Example function that converts a graph to TFTRT graph."""

    inp_dims = (100, 24, 24, 2)
    dummy_input = np.random.random_sample(inp_dims)
    orig_graph = get_simple_graph_def()  # use a frozen graph for inference
    # Get optimized graph
    trt_graph = trt.create_inference_graph(
        input_graph_def=orig_graph,
        outputs=["output"],
        max_batch_size=inp_dims[0],
        max_workspace_size_bytes=1 << 25,
        precision_mode="FP32",  # TRT Engine precision "FP32","FP16" or "INT8"
        minimum_segment_size=2  # minimum number of nodes in an engine
    )
    o1 = run_graph(orig_graph, dummy_input)
    o2 = run_graph(trt_graph, dummy_input)
    o3 = run_graph(trt_graph, dummy_input)
    assert np.array_equal(o1, o2)
    assert np.array_equal(o3, o2)  # sanity check
    fp16_graph = trt.create_inference_graph(
        input_graph_def=orig_graph,
        outputs=["output"],
        max_batch_size=inp_dims[0],
        max_workspace_size_bytes=1 << 25,
        precision_mode="FP16",  # TRT Engine precision "FP32","FP16" or "INT8"
        minimum_segment_size=2  # minimum number of nodes in an engine
    )
    int8_calib_gdef = trt.create_inference_graph(
        input_graph_def=orig_graph,
        outputs=["output"],
        max_batch_size=inp_dims[0],
        max_workspace_size_bytes=1 << 25,
        precision_mode="INT8",  # TRT Engine precision "FP32","FP16" or "INT8"
        minimum_segment_size=2  # minimum number of nodes in an engine
    )
    o4 = run_graph(fp16_graph, dummy_input)
    _ = run_calibration(int8_calib_gdef, dummy_input)
    int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef)
    o5 = run_graph(int8_graph, dummy_input)
    assert np.allclose(o1, o4)
    assert np.allclose(o1, o5)
    print("Pass")
Esempio n. 24
0
 def get_trt(self, grf, precision):
     re = trt.create_inference_graph(grf, [
         self.out_nd_str[0], self.out_nd_str[1], self.out_nd_str[2],
         self.out_nd_str[3]
     ],
                                     max_batch_size=20,
                                     max_workspace_size_bytes=2 << 10 << 20,
                                     precision_mode=precision,
                                     minimum_segment_size=10)
     return re
Esempio n. 25
0
def user(run_graph=execute_graph, run_calibration=execute_calibration):
  """Example function that converts a graph to TFTRT graph."""

  inp_dims = (100, 24, 24, 2)
  dummy_input = np.random.random_sample(inp_dims)
  orig_graph = get_simple_graph_def()  # use a frozen graph for inference
  # Get optimized graph
  trt_graph = trt.create_inference_graph(
      input_graph_def=orig_graph,
      outputs=["output"],
      max_batch_size=inp_dims[0],
      max_workspace_size_bytes=1 << 25,
      precision_mode="FP32",  # TRT Engine precision "FP32","FP16" or "INT8"
      minimum_segment_size=2  # minimum number of nodes in an engine
  )
  o1 = run_graph(orig_graph, dummy_input)
  o2 = run_graph(trt_graph, dummy_input)
  o3 = run_graph(trt_graph, dummy_input)
  assert np.array_equal(o1, o2)
  assert np.array_equal(o3, o2)  # sanity check
  fp16_graph = trt.create_inference_graph(
      input_graph_def=orig_graph,
      outputs=["output"],
      max_batch_size=inp_dims[0],
      max_workspace_size_bytes=1 << 25,
      precision_mode="FP16",  # TRT Engine precision "FP32","FP16" or "INT8"
      minimum_segment_size=2  # minimum number of nodes in an engine
  )
  int8_calib_gdef = trt.create_inference_graph(
      input_graph_def=orig_graph,
      outputs=["output"],
      max_batch_size=inp_dims[0],
      max_workspace_size_bytes=1 << 25,
      precision_mode="INT8",  # TRT Engine precision "FP32","FP16" or "INT8"
      minimum_segment_size=2  # minimum number of nodes in an engine
  )
  o4 = run_graph(fp16_graph, dummy_input)
  _ = run_calibration(int8_calib_gdef, dummy_input)
  int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef)
  o5 = run_graph(int8_graph, dummy_input)
  assert np.allclose(o1, o4)
  assert np.allclose(o1, o5)
  print("Pass")
 def _GetTrtGraph(self, gdef, precision_mode, is_dynamic_op):
   """Return trt converted graph."""
   return trt.create_inference_graph(
       input_graph_def=gdef,
       outputs=[OUTPUT_NAME],
       max_batch_size=self._input.shape[0],
       max_workspace_size_bytes=1 << 25,
       precision_mode=precision_mode,
       minimum_segment_size=2,
       is_dynamic_op=is_dynamic_op)
Esempio n. 27
0
def convert_with_tensorrt(args):
  """Function triggered by 'convert tensorrt' command.

  Args:
    args: A namespace parsed from command line.
  """
  # Import here instead of at top, because this will crash if TensorRT is
  # not installed
  from tensorflow.contrib import tensorrt  # pylint: disable=g-import-not-at-top
  tensorrt.create_inference_graph(
      None,
      None,
      max_batch_size=args.max_batch_size,
      max_workspace_size_bytes=args.max_workspace_size_bytes,
      precision_mode=args.precision_mode,
      minimum_segment_size=args.minimum_segment_size,
      is_dynamic_op=args.is_dynamic_op,
      input_saved_model_dir=args.dir,
      input_saved_model_tags=args.tag_set.split(','),
      output_saved_model_dir=args.output_dir)
Esempio n. 28
0
def convert_with_tensorrt(args):
    """Function triggered by 'convert tensorrt' command.

  Args:
    args: A namespace parsed from command line.
  """
    # Import here instead of at top, because this will crash if TensorRT is
    # not installed
    from tensorflow.contrib import tensorrt  # pylint: disable=g-import-not-at-top
    tensorrt.create_inference_graph(
        None,
        None,
        max_batch_size=args.max_batch_size,
        max_workspace_size_bytes=args.max_workspace_size_bytes,
        precision_mode=args.precision_mode,
        minimum_segment_size=args.minimum_segment_size,
        is_dynamic_op=args.is_dynamic_op,
        input_saved_model_dir=args.dir,
        input_saved_model_tags=args.tag_set.split(','),
        output_saved_model_dir=args.output_dir)
Esempio n. 29
0
def getFP32(input_graph, out_tensor, precision, batch_size, workspace_size):
    graph_prefix = input_graph.split('.pb')[0]
    output_graph = graph_prefix + "_tftrt_" + precision + ".pb"
    #print("output graph is ", output_graph)
    tftrt_graph = trt.create_inference_graph(
        getFrozenGraph(input_graph), [out_tensor],
        max_batch_size=batch_size,
        max_workspace_size_bytes=workspace_size,
        precision_mode=precision)  # Get optimized graph
    with gfile.FastGFile(output_graph, 'wb') as f:
        f.write(tftrt_graph.SerializeToString())
 def optimize_rcnn(self, input_graph_def):
     trt_graph = trt.create_inference_graph(
         input_graph_def=input_graph_def,
         outputs=[
             'detection_boxes', 'detection_scores', 'detection_classes',
             'num_detections'
         ],
         max_batch_size=1,
         # max_workspace_size_bytes=(2 << 10) << 20,
         precision_mode=self.precision)
     return trt_graph
def createOptimizedGraph(model, session, tf):
    outputs = [out.op.name for out in model.outputs]
    frozenGraph = _freeze_session(session, tf, output_names=outputs)
    # print("Model is now frozen...")
    # Possible precision modes: FP32, FP16, INT8
    optimizedGraph = tensorrt.create_inference_graph(
        frozenGraph, [out.op.name for out in model.outputs],
        max_batch_size=MachineSpecificSettings.OPTIMIZED_GRAPH_MAX_BATCH,
        precision_mode='FP32')

    return optimizedGraph, outputs
	def get_trt_graph(self, mode):
		"""Return trt converted graph."""
		if mode in ["FP32", "FP16", "INT8"]:
			return trt.create_inference_graph(
											input_graph_def=self._original_graph,
											outputs=["output"],
											max_batch_size=self._input.shape[0],
											max_workspace_size_bytes=1<<25,
											precision_mode=mode,	# TRT Engine precision "FP32", "FP16" or "INT8"
											minimum_segment_size=2	# minimum number of nodes in an engine
			)
		return None
Esempio n. 33
0
def getRTGraph():
    # Unfortunately it needs a post feb/2019 version of TensorRT to optimize
    # Convolution2DTranspose, i.e., where we spend most of the time...

    g, model = getFrozenGraph()
    output = [x.op.name for x in model.outputs]
    newG = trt.create_inference_graph(input_graph_def=g,
                                      outputs=output,
                                      max_batch_size=1,
                                      max_workspace_size_bytes=400000000,
                                      precision_mode='FP16')
    return newG, model
Esempio n. 34
0
def optimizeModel(model_location):
    """
  Creates an optimized Tensor Real Time model from a frozen inference graph 
  """
    graph_def, model, category_index, image_tensor, tensor_dict = loadModelDataFromDir(
        model_location)
    trt_model = trt.create_inference_graph(graph_def,
                                           tensor_dict,
                                           max_batch_size=2,
                                           precision_mode="FP16",
                                           minimum_segment_size=50)
    return trt_model
 def get_trt_graph(self, mode):
   """Return trt converted graph."""
   if mode in  ["FP32", "FP16", "INT8"]:
     return trt.create_inference_graph(
         input_graph_def=self._original_graph,
         outputs=["output"],
         max_batch_size=self._input.shape[0],
         max_workspace_size_bytes=1 << 25,
         precision_mode=mode,  # TRT Engine precision "FP32","FP16" or "INT8"
         minimum_segment_size=2  # minimum number of nodes in an engine
         )
   return None
Esempio n. 36
0
def get_trt_graph(filename, batch_size, workspace_size, precision, output_pb):
    print('Start to optimize graph')
    trt_graph = trt.create_inference_graph(
        get_GraphDef(filename),
        ["resnet_v1_50/predictions/Reshape_1"],
        max_batch_size=batch_size,
        max_workspace_size_bytes=workspace_size,
        precision_mode=precision)  # Get optimized graph

    with gfile.FastGFile(output_pb, 'wb') as f:
        f.write(trt_graph.SerializeToString())
    return trt_graph
Esempio n. 37
0
def getFP16(batch_size=64, workspace_size=1 << 30):
    trt_graph = trt.create_inference_graph(
        getGraph(), ["classes"],
        max_batch_size=batch_size,
        max_workspace_size_bytes=workspace_size,
        precision_mode="FP16")
    with gfile.FastGFile(fp16_file, 'wb') as f:
        f.write(trt_graph.SerializeToString())
    if FLAGS.logging_enabled:
        writer = tf.summary.FileWriter(FLAGS.log_dir + "16")
        writer.add_graph(trt_graph)
    return trt_graph
Esempio n. 38
0
    def __init__(self, graph, batch_size, precision):
        tftrt_graph = tftrt.create_inference_graph(
            graph.frozen,
            outputs=graph.y_name,
            max_batch_size=batch_size,
            max_workspace_size_bytes=1 << 25,
            precision_mode=precision,
            minimum_segment_size=2)

        opt_graph = copy.deepcopy(graph)
        opt_graph.frozen = tftrt_graph
        super(TftrtEngine, self).__init__(opt_graph)
        self.batch_size = batch_size
Esempio n. 39
0
  def testIncOpPlugin(self):
    inp_dims = (5, 24, 24, 2)
    dummy_input = numpy.ones(inp_dims).astype(numpy.float32)
    orig_graph = self._get_plugin_graph_def()  # graph with plugin node

    # trigger conversion.
    # plugin nodes have been registered during import, converter will be able to
    # create corresponding plugin layer during conversion.
    trt_graph = tensorrt.create_inference_graph(
        input_graph_def=orig_graph,
        outputs=["output"],
        max_batch_size=inp_dims[0],
        max_workspace_size_bytes=1 << 25,
        precision_mode="FP32",
        minimum_segment_size=2)
    o2 = self._run_graph(trt_graph, dummy_input)
    self.assertEqual(35, o2.reshape([-1])[0])
Esempio n. 40
0
    # run over real calibration data here, we are mimicking a calibration set of
    # 30 different batches. Use as much calibration data as you want
    for _ in range(30):
      val = sess.run(out, {inp: dumm_inp})
  return val


if "__main__" in __name__:
  inp_dims = (100, 24, 24, 2)
  dummy_input = np.random.random_sample(inp_dims)
  orig_graph = get_simple_graph_def()  # use a frozen graph for inference
  # Get optimized graph
  trt_graph = trt.create_inference_graph(
      input_graph_def=orig_graph,
      outputs=["output"],
      max_batch_size=inp_dims[0],
      max_workspace_size_bytes=1 << 25,
      precision_mode="FP32",  # TRT Engine precision "FP32","FP16" or "INT8"
      minimum_segment_size=2  # minimum number of nodes in an engine
  )
  o1 = run_graph(orig_graph, dummy_input)
  o2 = run_graph(trt_graph, dummy_input)
  o3 = run_graph(trt_graph, dummy_input)
  assert np.array_equal(o1, o2)
  assert np.array_equal(o3, o2)  # sanity check
  fp16_graph = trt.create_inference_graph(
      input_graph_def=orig_graph,
      outputs=["output"],
      max_batch_size=inp_dims[0],
      max_workspace_size_bytes=1 << 25,
      precision_mode="FP16",  # TRT Engine precision "FP32","FP16" or "INT8"
      minimum_segment_size=2  # minimum number of nodes in an engine
Esempio n. 41
0
  return g.as_graph_def()


def run_graph(gdef, dumm_inp):
  gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
  ops.reset_default_graph()
  g = ops.Graph()
  with g.as_default():
    inp, out = importer.import_graph_def(
        graph_def=gdef, return_elements=["input", "output"])
    inp = inp.outputs[0]
    out = out.outputs[0]
  with csess.Session(
      config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess:
    val = sess.run(out, {inp: dumm_inp})
  return val


if "__main__" in __name__:
  inp_dims = (100, 24, 24, 2)
  dummy_input = np.random.random_sample(inp_dims)
  gdef = get_simple_graph_def()
  # Get optimized graph
  trt_graph = trt.create_inference_graph(gdef, ["output"], inp_dims[0])
  o1 = run_graph(gdef, dummy_input)
  o2 = run_graph(trt_graph, dummy_input)
  o3 = run_graph(trt_graph, dummy_input)
  assert np.array_equal(o1, o2)
  assert np.array_equal(o3, o2)  # sanity check
  print("Pass")