Example #1
0
def tf_to_trt_graph(graph, y_name, batch_size, precision):

    # New code in May 2019, not stable yet
    #converter = trt.TrtGraphConverter(
    #    input_graph_def=graph.frozen, nodes_blacklist=graph.y_name,
    #    max_batch_size=batch_size, max_workspace_size_bytes=1 << 30,
    #    precision_mode=precision)
    #self.tftrt_graph = converter.convert()

    if precision == "INT8":
        calib_graph = trt.create_inference_graph(
            graph,
            outputs=y_name,
            max_batch_size=batch_size,
            max_workspace_size_bytes=1 << 25,
            precision_mode=precision,
            minimum_segment_size=2)
        tftrt_graph = trt.calib_graph_to_infer_graph(calibGraph)
    else:
        tftrt_graph = trt.create_inference_graph(
            graph,
            outputs=y_name,
            max_batch_size=batch_size,
            max_workspace_size_bytes=1 << 25,
            precision_mode=precision,
            minimum_segment_size=2)
    return tftrt_graph
Example #2
0
def convert(model_path, output_path, tf1, precision, max_workspace_size,
            min_segment_size, saved_model_tags, build, batch_shape):
    if not tf1:
        params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            max_workspace_size_bytes=max_workspace_size,
            precision_mode=precision,
            minimum_segment_size=min_segment_size)
        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=model_path,
            input_saved_model_tags=saved_model_tags,
            conversion_params=params)
        try:
            converter.convert()
        except Exception as e:
            raise RuntimeError('{}. Just try passing "--tf1".'.format(e))
        if build or batch_shape[0]:

            def reference_data_gen():
                inp1 = tf.random.normal(size=batch_shape).astype(tf.float32)
                inp2 = tf.random.normal(size=batch_shape).astype(tf.float32)
                yield (inp1, inp2)

            converter.build(reference_data_gen)
        converter.save(output_saved_model_dir=output_path)
    else:
        trt.create_inference_graph(None,
                                   None,
                                   max_batch_size=1,
                                   max_workspace_size_bytes=max_workspace_size,
                                   precision_mode=precision,
                                   minimum_segment_size=minimum_segment_size,
                                   is_dynamic_op=True,
                                   input_saved_model_dir=model_path,
                                   input_saved_model_tags=saved_model_tags,
                                   output_saved_model_dir=output_path)
Example #3
0
def create_inference_graph(
    input_graph_def,
    outputs,
    max_batch_size=1,
    max_workspace_size_bytes=trt_convert.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
    precision_mode=trt_convert.TrtPrecisionMode.FP32,
    minimum_segment_size=3,
    is_dynamic_op=False,
    maximum_cached_engines=1,
    cached_engine_batches=None,
    input_saved_model_dir=None,
    input_saved_model_tags=None,
    output_saved_model_dir=None,
    session_config=None):
  return trt_convert.create_inference_graph(
      input_graph_def=input_graph_def,
      outputs=outputs,
      max_batch_size=max_batch_size,
      max_workspace_size_bytes=max_workspace_size_bytes,
      precision_mode=precision_mode,
      minimum_segment_size=minimum_segment_size,
      is_dynamic_op=is_dynamic_op,
      maximum_cached_engines=maximum_cached_engines,
      cached_engine_batches=cached_engine_batches,
      input_saved_model_dir=input_saved_model_dir,
      input_saved_model_tags=input_saved_model_tags,
      output_saved_model_dir=output_saved_model_dir,
      session_config=session_config)
Example #4
0
    def _TestCreateInferenceGraph(self,
                                  input_saved_model_dir=None,
                                  output_saved_model_dir=None):
        """General method to test trt_convert.create_inference_graph()."""
        input_graph_def = None if input_saved_model_dir else self._GetGraphDef(
        )
        output_graph_def = trt_convert.create_inference_graph(
            input_graph_def, ["output"],
            max_workspace_size_bytes=TrtConvertTest.
            _TRT_MAX_WORKSPACE_SIZE_BYTES,
            input_saved_model_dir=input_saved_model_dir,
            output_saved_model_dir=output_saved_model_dir,
            session_config=self._GetConfigProto())
        graph_defs_to_verify = [output_graph_def]
        if output_saved_model_dir is not None:
            saved_model_graph_def = saved_model_utils.get_meta_graph_def(
                output_saved_model_dir, tag_constants.SERVING).graph_def
            self.assertTrue(
                isinstance(saved_model_graph_def, graph_pb2.GraphDef))
            graph_defs_to_verify.append(saved_model_graph_def)

        for graph_def in graph_defs_to_verify:
            node_name_to_op = {node.name: node.op for node in graph_def.node}
            self.assertEqual(
                {
                    "input": "Placeholder",
                    "TRTEngineOp_0": "TRTEngineOp",
                    "output": "Identity"
                }, node_name_to_op)
Example #5
0
def trt_cfe_test():
    graph = tf.Graph()
    with graph.as_default():
        with tf.Session() as sess:
            # First deserialize your frozen graph:
            with tf.io.gfile.GFile("/home/vilon_tao/Projects/machine-learning/tf20/models/freezed_open_shelf_cfe_model.pb", 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
            # Now you can create a TensorRT inference graph from your
            # frozen graph:
            trt_graph = trt.create_inference_graph(
                input_graph_def=graph_def,
                outputs=['loc_conf/loc_conf_concat/concat'],
                max_batch_size=20,
                max_workspace_size_bytes=2 << 10,
                precision_mode="FP32")

            tf.import_graph_def(trt_graph, name='')
            # tf.import_graph_def(graph_def, name='')
            tf_input = sess.graph.get_tensor_by_name('input_1:0')
            tf_output = sess.graph.get_tensor_by_name('loc_conf/loc_conf_concat/concat:0')
            start_time = time.time()
            for i in range(100):
                predictions = sess.run(tf_output,
                                      feed_dict={tf_input: np.random.rand(8, 512, 512, 3).astype(dtype=np.float32)})
            print('TRT inference with 8 * 512 * 512 *3 cost: {} ms.'.format(1000 * (time.time() - start_time)/100))
Example #6
0
def create_inference_graph(input_graph_def,
                           outputs,
                           max_batch_size=1,
                           max_workspace_size_bytes=trt_convert.
                           DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
                           precision_mode=trt_convert.TrtPrecisionMode.FP32,
                           minimum_segment_size=3,
                           is_dynamic_op=False,
                           maximum_cached_engines=1,
                           cached_engine_batches=None,
                           input_saved_model_dir=None,
                           input_saved_model_tags=None,
                           output_saved_model_dir=None,
                           session_config=None):
    return trt_convert.create_inference_graph(
        input_graph_def=input_graph_def,
        outputs=outputs,
        max_batch_size=max_batch_size,
        max_workspace_size_bytes=max_workspace_size_bytes,
        precision_mode=precision_mode,
        minimum_segment_size=minimum_segment_size,
        is_dynamic_op=is_dynamic_op,
        maximum_cached_engines=maximum_cached_engines,
        cached_engine_batches=cached_engine_batches,
        input_saved_model_dir=input_saved_model_dir,
        input_saved_model_tags=input_saved_model_tags,
        output_saved_model_dir=output_saved_model_dir,
        session_config=session_config)
Example #7
0
  def _TestCreateInferenceGraph(self,
                                input_saved_model_dir=None,
                                output_saved_model_dir=None):
    """General method to test trt_convert.create_inference_graph()."""
    input_graph_def = None if input_saved_model_dir else self._GetGraphDef()
    output_graph_def = trt_convert.create_inference_graph(
        input_graph_def, ["output"],
        max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES,
        input_saved_model_dir=input_saved_model_dir,
        output_saved_model_dir=output_saved_model_dir,
        session_config=self._GetConfigProto())
    graph_defs_to_verify = [output_graph_def]
    if output_saved_model_dir is not None:
      saved_model_graph_def = saved_model_utils.get_meta_graph_def(
          output_saved_model_dir, tag_constants.SERVING).graph_def
      self.assertTrue(isinstance(saved_model_graph_def, graph_pb2.GraphDef))
      graph_defs_to_verify.append(saved_model_graph_def)

    for graph_def in graph_defs_to_verify:
      node_name_to_op = {node.name: node.op for node in graph_def.node}
      self.assertEqual({
          "input": "Placeholder",
          "TRTEngineOp_0": "TRTEngineOp",
          "output": "Identity"
      }, node_name_to_op)
Example #8
0
def create_inference_graph(input_graph_def,
                           outputs,
                           max_batch_size=1,
                           max_workspace_size_bytes=2 << 20,
                           precision_mode=trt_convert.TrtPrecisionMode.FP32,
                           minimum_segment_size=3,
                           is_dynamic_op=False,
                           maximum_cached_engines=1,
                           cached_engine_batches=None,
                           use_calibration=True,
                           input_saved_model_dir=None,
                           input_saved_model_tags=None,
                           output_saved_model_dir=None,
                           session_config=None):
    return trt_convert.create_inference_graph(
        input_graph_def=input_graph_def,
        outputs=outputs,
        max_batch_size=max_batch_size,
        max_workspace_size_bytes=max_workspace_size_bytes,
        precision_mode=precision_mode,
        minimum_segment_size=minimum_segment_size,
        is_dynamic_op=is_dynamic_op,
        maximum_cached_engines=maximum_cached_engines,
        cached_engine_batches=cached_engine_batches,
        use_calibration=use_calibration,
        input_saved_model_dir=input_saved_model_dir,
        input_saved_model_tags=input_saved_model_tags,
        output_saved_model_dir=output_saved_model_dir,
        session_config=session_config)
Example #9
0
def frozen_graph_trt(
    input_frozen_graph_path,
    output_dir,
    max_batch_size,
    precision_mode,
    is_dynamic_op):
    '''
    create a TensorRT inference graph from a Frozen Graph
    '''
    output_node_names = [BOXES_NAME, CLASSES_NAME, SCORES_NAME, NUM_DETECTIONS_NAME]
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    output_frozen_graph_path = os.path.join(output_dir, 'trt_frozen_graph.pb')
    with tf.io.gfile.GFile(input_frozen_graph_path, 'rb') as f:
        graph_def = tf.compat.v1.GraphDef()
        graph_def.ParseFromString(f.read())

    trt_graph = trt.create_inference_graph(
        input_graph_def=graph_def,
        outputs=output_node_names,
        max_batch_size=max_batch_size,
        max_workspace_size_bytes=trt.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
        precision_mode=precision_mode,
        is_dynamic_op=False)

    with open(output_frozen_graph_path, 'wb') as f:
        f.write(trt_graph.SerializeToString())
Example #10
0
def GetINT8(graph, graph_def, nodes_list):
    with graph.as_default():
        trt_graph = trt.create_inference_graph(
            graph_def,
            nodes_list,
            precision_mode='INT8',
            max_workspace_size_bytes=1 << 30,
            max_batch_size=32)
    return trt_graph
def convert_with_tensorrt(args):
  """Function triggered by 'convert tensorrt' command.

  Args:
    args: A namespace parsed from command line.
  """
  # Import here instead of at top, because this will crash if TensorRT is
  # not installed
  from tensorflow.python.compiler.tensorrt import trt_convert  # pylint: disable=g-import-not-at-top
  trt_convert.create_inference_graph(
      None,
      None,
      max_batch_size=args.max_batch_size,
      max_workspace_size_bytes=args.max_workspace_size_bytes,
      precision_mode=args.precision_mode,
      minimum_segment_size=args.minimum_segment_size,
      is_dynamic_op=args.is_dynamic_op,
      input_saved_model_dir=args.dir,
      input_saved_model_tags=args.tag_set.split(','),
      output_saved_model_dir=args.output_dir)
Example #12
0
def trt_classfication_test():
    graph = tf.Graph()
    with graph.as_default():
        with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
            # First deserialize your frozen graph:
            with tf.io.gfile.GFile("./models/trt_model/classfication.pb", 'rb') as f:
            # with tf.io.gfile.GFile("/home/vilon_tao/Projects/machine-learning/tf20/models/freezed_open_shelf_resenet_model.pb", 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())

            # count how many ops in frozen model

            print('before trt:>>>>>>>>>>>>>>>>>')
            for n in graph_def.node:
                print(n.op)
            trt_engine_ops = len([1 for n in graph_def.node if str(n.op) == 'TRTEngineOp'])
            print("numb. of trt_engine_ops in frozen_graph:", trt_engine_ops)
            all_ops = len([1 for n in graph_def.node])
            print("numb. of all_ops in frozen_graph:", all_ops)
            # Now you can create a TensorRT inference graph from your
            # frozen graph:
            trt_graph = trt.create_inference_graph(
                input_graph_def=graph_def,
                outputs=['embeddings/fc_512/BiasAdd'],
                max_batch_size=20,
                # is_dynamic_op=True,
                # maximum_cached_engines=20,
                # cached_engine_batches=[20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
                max_workspace_size_bytes=2 << 11,
                precision_mode="FP32")

            # count how many ops in trt_graph
            print('after trt:>>>>>>>>>>>>>>>>>')
            for n in trt_graph.node:
                print (n.op)

            trt_engine_ops = len([1 for n in trt_graph.node if str(n.op) == 'TRTEngineOp'])
            print("numb. of trt_engine_ops in trt_graph", trt_engine_ops)
            all_ops = len([1 for n in trt_graph.node])
            print("numb. of all_ops in in trt_graph:", all_ops)

            # graph_io.write_graph(trt_graph, './models/trt_model/', 'classfication.pb', as_text=False)
            # tf.import_graph_def(graph_def, name='')
            tf.import_graph_def(trt_graph, name='')

            tf_input = sess.graph.get_tensor_by_name('input_1:0')

            tf_output = sess.graph.get_tensor_by_name('embeddings/fc_512/BiasAdd:0')
            start_time = time.time()
            for i in range(100):
                embeddings = sess.run(tf_output,
                                      feed_dict={tf_input: np.random.rand(8, 96, 96, 3).astype(dtype=np.float32)})
            print('TRT inference with 8 * 96 * 96 * 3 cost: {} ms.'.format(1000 * (time.time() - start_time)/100))
Example #13
0
def export(saved_model_dir,
           tensorrt_model_dir,
           max_batch_size=1,
           max_workspace_size_bytes=2 << 20,
           precision_mode='FP16',
           minimum_segment_size=3,
           is_dynamic_op=False,
           maximum_cached_engines=1):
    """Exports TensorRT model."""
    trt_convert.create_inference_graph(
        None,
        None,
        max_batch_size=max_batch_size,
        max_workspace_size_bytes=max_workspace_size_bytes,
        precision_mode=precision_mode,
        minimum_segment_size=minimum_segment_size,
        is_dynamic_op=is_dynamic_op,
        maximum_cached_engines=maximum_cached_engines,
        input_saved_model_dir=saved_model_dir,
        input_saved_model_tags=None,
        input_saved_model_signature_key=None,
        output_saved_model_dir=tensorrt_model_dir)
Example #14
0
def convert_with_tensorrt(args):
    """Function triggered by 'convert tensorrt' command.

  Args:
    args: A namespace parsed from command line.
  """
    # Import here instead of at top, because this will crash if TensorRT is
    # not installed
    from tensorflow.python.compiler.tensorrt import trt_convert as trt  # pylint: disable=g-import-not-at-top

    if not args.convert_tf1_model:
        params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            max_workspace_size_bytes=args.max_workspace_size_bytes,
            precision_mode=args.precision_mode,
            minimum_segment_size=args.minimum_segment_size)
        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=args.dir,
            input_saved_model_tags=args.tag_set.split(','),
            conversion_params=params)
        try:
            converter.convert()
        except Exception as e:
            raise RuntimeError(
                '{}. Try passing "--convert_tf1_model=True".'.format(e))
        converter.save(output_saved_model_dir=args.output_dir)
    else:
        trt.create_inference_graph(
            None,
            None,
            max_batch_size=1,
            max_workspace_size_bytes=args.max_workspace_size_bytes,
            precision_mode=args.precision_mode,
            minimum_segment_size=args.minimum_segment_size,
            is_dynamic_op=True,
            input_saved_model_dir=args.dir,
            input_saved_model_tags=args.tag_set.split(','),
            output_saved_model_dir=args.output_dir)
Example #15
0
def export(saved_model_dir,
           tensorrt_model_dir,
           max_batch_size=1,
           max_workspace_size_bytes=2 << 20,
           precision_mode='FP16',
           minimum_segment_size=3,
           is_dynamic_op=False,
           maximum_cached_engines=1):
    """Exports TensorRT model."""
    config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
    trt_convert.create_inference_graph(
        None,
        None,
        max_batch_size=max_batch_size,
        max_workspace_size_bytes=max_workspace_size_bytes,
        precision_mode=precision_mode,
        minimum_segment_size=minimum_segment_size,
        is_dynamic_op=is_dynamic_op,
        maximum_cached_engines=maximum_cached_engines,
        input_saved_model_dir=saved_model_dir,
        input_saved_model_tags=None,
        input_saved_model_signature_key=None,
        output_saved_model_dir=tensorrt_model_dir,
        session_config=config)
Example #16
0
def convert_graphdef(model_file, output_layer, output_file):
    graph = tf.Graph()
    graph_def = tf.compat.v1.GraphDef()

    with open(model_file, "rb") as f:
        graph_def.ParseFromString(f.read())

    with graph.as_default():
        tf.import_graph_def(graph_def)
        trt_graph = trt.create_inference_graph(input_graph_def=graph_def,
                                               outputs=[output_layer],
                                               precision_mode='FP16')

        tf.io.write_graph(trt_graph, '/tmp/', output_file, as_text=False)

    return trt_graph
Example #17
0
 def testCreateInferenceGraph_MinimumSegmentSize(self):
   if not trt_convert.is_tensorrt_enabled():
     return
   output_graph_def = trt_convert.create_inference_graph(
       self._GetGraphDef(), ["output"],
       minimum_segment_size=5,
       max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES,
       is_dynamic_op=False)
   node_name_to_op = {node.name: node.op for node in output_graph_def.node}
   self.assertEqual({
       "v1/read": "Const",
       "input": "Placeholder",
       "add": "Add",
       "mul": "Mul",
       "add_1": "Add",
       "output": "Identity"
   }, node_name_to_op)
  def _GetGraphDef(self, use_trt, max_batch_size, model_dir):
    """Get the frozen mnist GraphDef.

    Args:
      use_trt: whether use TF-TRT to convert the graph.
      max_batch_size: the max batch size to apply during TF-TRT conversion.
      model_dir: the model directory to load the checkpoints.

    Returns:
      The frozen mnist GraphDef.
    """
    graph = ops.Graph()
    with self.session(graph=graph) as sess:
      with graph.device('/GPU:0'):
        x = array_ops.placeholder(
            shape=(None, 28, 28, 1), dtype=dtypes.float32, name=INPUT_NODE_NAME)
        self._BuildGraph(x)
      # Load weights
      mnist_saver = saver.Saver()
      checkpoint_file = latest_checkpoint(model_dir)
      mnist_saver.restore(sess, checkpoint_file)
      # Freeze
      graph_def = graph_util.convert_variables_to_constants(
          sess, sess.graph_def, output_node_names=[OUTPUT_NODE_NAME])
    # Convert with TF-TRT
    if use_trt:
      logging.info('Number of nodes before TF-TRT conversion: %d',
                   len(graph_def.node))
      graph_def = trt_convert.create_inference_graph(
          graph_def,
          outputs=[OUTPUT_NODE_NAME],
          max_batch_size=max_batch_size,
          precision_mode='INT8',
          # There is a 2GB GPU memory limit for each test, so we set
          # max_workspace_size_bytes to 256MB to leave enough room for TF
          # runtime to allocate GPU memory.
          max_workspace_size_bytes=1 << 28,
          minimum_segment_size=2,
          use_calibration=False,
      )
      logging.info('Number of nodes after TF-TRT conversion: %d',
                   len(graph_def.node))
      num_engines = len(
          [1 for n in graph_def.node if str(n.op) == 'TRTEngineOp'])
      self.assertEqual(1, num_engines)
    return graph_def
Example #19
0
 def testCreateInferenceGraph_MinimumSegmentSize(self):
   if not trt_convert.is_tensorrt_enabled():
     return
   output_graph_def = trt_convert.create_inference_graph(
       self._GetGraphDef(), ["output"],
       minimum_segment_size=5,
       max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES,
       is_dynamic_op=False)
   node_name_to_op = {node.name: node.op for node in output_graph_def.node}
   self.assertEqual({
       "v1/read": "Const",
       "input": "Placeholder",
       "add": "Add",
       "mul": "Mul",
       "add_1": "Add",
       "output": "Identity"
   }, node_name_to_op)
Example #20
0
    def testCreateInferenceGraph_DynamicOp(self):
        if not is_tensorrt_enabled():
            return
        enable_test_value()

        tmp_dir = self.get_temp_dir()
        input_saved_model_dir = os.path.join(tmp_dir, "in_dir2")
        output_saved_model_dir = os.path.join(tmp_dir, "out_dir2")
        self._WriteInputSavedModel(input_saved_model_dir)
        output_graph_def = trt_convert.create_inference_graph(
            None,
            None,
            max_workspace_size_bytes=TrtConvertTest.
            _TRT_MAX_WORKSPACE_SIZE_BYTES,
            is_dynamic_op=True,
            maximum_cached_engines=2,
            input_saved_model_dir=input_saved_model_dir,
            output_saved_model_dir=output_saved_model_dir,
            session_config=self._GetConfigProto())

        # Test the output GraphDef.
        with ops.Graph().as_default():
            importer.import_graph_def(output_graph_def, name="")
            with self.test_session(config=self._GetConfigProto()) as sess:
                # Run with batch size 1, a new engine is created and cached.
                self._TestRun(sess, 1, True)
                # Run with batch size 2, a new engine is created and cached.
                self._TestRun(sess, 2, True)
                # Run with batch size 3, since the number of cached engines has reached
                # the max, it should evict an old engine and create a new one.
                self._TestRun(sess, 3, True)

        # Test the output SavedModel
        with ops.Graph().as_default():
            with self.test_session(config=self._GetConfigProto()) as sess:
                loader.load(sess, [tag_constants.SERVING],
                            output_saved_model_dir)
                # Run with batch size 1, a new engine is created and cached.
                self._TestRun(sess, 1, True)
                # Run with batch size 2, a new engine is created and cached.
                self._TestRun(sess, 2, True)
                # Run with batch size 3, since the number of cached engines has reached
                # the max, it should evict an old engine and create a new one.
                self._TestRun(sess, 3, True)
  def _GetTrtGraphDef(self, run_params, graph_state, gdef):
    """Return trt converted graphdef."""
    params = self._GetParamsCached()
    conversion_params = self.GetConversionParams(run_params)
    logging.info(conversion_params)

    config_for_trt = self._GetConfigProto(run_params, graph_state)
    return trt_convert.create_inference_graph(
        input_graph_def=gdef,
        outputs=params.input_names + params.output_names,
        max_batch_size=conversion_params.max_batch_size,
        max_workspace_size_bytes=conversion_params.max_workspace_size_bytes,
        precision_mode=conversion_params.precision_mode,
        minimum_segment_size=conversion_params.minimum_segment_size,
        is_dynamic_op=conversion_params.is_dynamic_op,
        maximum_cached_engines=conversion_params.maximum_cached_engines,
        cached_engine_batches=conversion_params.cached_engine_batches,
        use_calibration=conversion_params.use_calibration,
        session_config=config_for_trt)
  def _GetTrtGraphDef(self, run_params, graph_state, gdef):
    """Return trt converted graphdef."""
    params = self._GetParamsCached()
    conversion_params = self.GetConversionParams(run_params)
    logging.info(conversion_params)

    config_for_trt = self._GetConfigProto(run_params, graph_state)
    return trt_convert.create_inference_graph(
        input_graph_def=gdef,
        outputs=params.input_names + params.output_names,
        max_batch_size=conversion_params.max_batch_size,
        max_workspace_size_bytes=conversion_params.max_workspace_size_bytes,
        precision_mode=conversion_params.precision_mode,
        minimum_segment_size=conversion_params.minimum_segment_size,
        is_dynamic_op=conversion_params.is_dynamic_op,
        maximum_cached_engines=conversion_params.maximum_cached_engines,
        cached_engine_batches=conversion_params.cached_engine_batches,
        use_calibration=conversion_params.use_calibration,
        session_config=config_for_trt)
Example #23
0
    def testCreateInferenceGraph_StaticOp(self):
        if not is_tensorrt_enabled():
            return
        enable_test_value()

        tmp_dir = self.get_temp_dir()
        input_saved_model_dir = os.path.join(tmp_dir, "in_dir3")
        output_saved_model_dir = os.path.join(tmp_dir, "out_dir3")
        self._WriteInputSavedModel(input_saved_model_dir)
        output_graph_def = trt_convert.create_inference_graph(
            None,
            None,
            max_batch_size=1,
            max_workspace_size_bytes=TrtConvertTest.
            _TRT_MAX_WORKSPACE_SIZE_BYTES,
            is_dynamic_op=False,
            maximum_cached_engines=2,  # This is noop, added just for testing.
            input_saved_model_dir=input_saved_model_dir,
            output_saved_model_dir=output_saved_model_dir,
            session_config=self._GetConfigProto())

        # Test the output GraphDef.
        with ops.Graph().as_default():
            importer.import_graph_def(output_graph_def, name="")
            with self.test_session(config=self._GetConfigProto()) as sess:
                # Run with batch size 1, the default engine embedded in the graphdef
                # will be used.
                self._TestRun(sess, 1, True)
                # Run with batch size 2, which exceed the max_batch_size, it should fall
                # back to TF function.
                self._TestRun(sess, 2, False)

        # Test the output SavedModel
        with ops.Graph().as_default():
            with self.test_session(config=self._GetConfigProto()) as sess:
                loader.load(sess, [tag_constants.SERVING],
                            output_saved_model_dir)
                # Run with batch size 1, the default engine embedded in the graphdef
                # will be used.
                self._TestRun(sess, 1, True)
                # Run with batch size 2, which exceed the max_batch_size, it should fall
                # back to TF function.
                self._TestRun(sess, 2, False)
Example #24
0
  def testCreateInferenceGraph_DynamicOp(self):
    if not trt_convert.is_tensorrt_enabled():
      return
    trt_convert.enable_test_value()

    tmp_dir = self.get_temp_dir()
    input_saved_model_dir = os.path.join(tmp_dir, "in_dir2")
    output_saved_model_dir = os.path.join(tmp_dir, "out_dir2")
    self._WriteInputSavedModel(input_saved_model_dir)
    output_graph_def = trt_convert.create_inference_graph(
        None,
        None,
        max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES,
        is_dynamic_op=True,
        maximum_cached_engines=2,
        input_saved_model_dir=input_saved_model_dir,
        output_saved_model_dir=output_saved_model_dir,
        session_config=self._GetConfigProto())

    # Test the output GraphDef.
    with ops.Graph().as_default():
      importer.import_graph_def(output_graph_def, name="")
      with self.test_session(config=self._GetConfigProto()) as sess:
        # Run with batch size 1, a new engine is created and cached.
        self._TestRun(sess, 1, True)
        # Run with batch size 2, a new engine is created and cached.
        self._TestRun(sess, 2, True)
        # Run with batch size 3, since the number of cached engines has reached
        # the max, it should evict an old engine and create a new one.
        self._TestRun(sess, 3, True)

    # Test the output SavedModel
    with ops.Graph().as_default():
      with self.test_session(config=self._GetConfigProto()) as sess:
        loader.load(sess, [tag_constants.SERVING], output_saved_model_dir)
        # Run with batch size 1, a new engine is created and cached.
        self._TestRun(sess, 1, True)
        # Run with batch size 2, a new engine is created and cached.
        self._TestRun(sess, 2, True)
        # Run with batch size 3, since the number of cached engines has reached
        # the max, it should evict an old engine and create a new one.
        self._TestRun(sess, 3, True)
Example #25
0
  def testCreateInferenceGraph_StaticOp(self):
    if not trt_convert.is_tensorrt_enabled():
      return
    trt_convert.enable_test_value()

    tmp_dir = self.get_temp_dir()
    input_saved_model_dir = os.path.join(tmp_dir, "in_dir3")
    output_saved_model_dir = os.path.join(tmp_dir, "out_dir3")
    self._WriteInputSavedModel(input_saved_model_dir)
    output_graph_def = trt_convert.create_inference_graph(
        None,
        None,
        max_batch_size=1,
        max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES,
        is_dynamic_op=False,
        maximum_cached_engines=2,  # This is noop, added just for testing.
        input_saved_model_dir=input_saved_model_dir,
        output_saved_model_dir=output_saved_model_dir,
        session_config=self._GetConfigProto())

    # Test the output GraphDef.
    with ops.Graph().as_default():
      importer.import_graph_def(output_graph_def, name="")
      with self.test_session(config=self._GetConfigProto()) as sess:
        # Run with batch size 1, the default engine embedded in the graphdef
        # will be used.
        self._TestRun(sess, 1, True)
        # Run with batch size 2, which exceed the max_batch_size, it should fall
        # back to TF function.
        self._TestRun(sess, 2, False)

    # Test the output SavedModel
    with ops.Graph().as_default():
      with self.test_session(config=self._GetConfigProto()) as sess:
        loader.load(sess, [tag_constants.SERVING], output_saved_model_dir)
        # Run with batch size 1, the default engine embedded in the graphdef
        # will be used.
        self._TestRun(sess, 1, True)
        # Run with batch size 2, which exceed the max_batch_size, it should fall
        # back to TF function.
        self._TestRun(sess, 2, False)
Example #26
0
def saved_model_trt(
    input_saved_model_dir, 
    output_dir,
    max_batch_size, 
    precision_mode,
    is_dynamic_op):
    '''
    create a TensorRT inference graph from a SavedModel
    '''
    output_frozen_graph_path = os.path.join(output_dir, 'trt_frozen_graph.pb')
    trt_graph = trt.create_inference_graph(
        input_graph_def=None,
        outputs=None,
        input_saved_model_dir=input_saved_model_dir,
        input_saved_model_tags=['serve'],
        max_batch_size=max_batch_size,
        max_workspace_size_bytes=trt.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
        precision_mode=precision_mode,
        output_saved_model_dir=output_dir,
        is_dynamic_op=False)
    with open(output_frozen_graph_path, 'wb') as f:
        f.write(trt_graph.SerializeToString())
Example #27
0
#import matplotlib.pyplot as plt
#import matplotlib.patches as patches
import tensorflow as tf
import numpy as np
import time
# from tf_trt_models.tf_trt_models.detection import download_detection_model, build_detection_graph

IMAGE_PATH = 'data/warriors.jpg'
FROZEN_GRAPH_NAME = 'data/frozen_inference_graph_face.pb'

output_dir = ''
frozen_graph = tf.GraphDef()
with open(os.path.join(output_dir, FROZEN_GRAPH_NAME), 'rb') as f:
    frozen_graph.ParseFromString(f.read())

INPUT_NAME = 'image_tensor'
BOXES_NAME = 'detection_boxes'
CLASSES_NAME = 'detection_classes'
SCORES_NAME = 'detection_scores'
MASKS_NAME = 'detection_masks'
NUM_DETECTIONS_NAME = 'num_detections'

input_names = [INPUT_NAME]
output_names = [BOXES_NAME, CLASSES_NAME, SCORES_NAME, NUM_DETECTIONS_NAME]

trt_graph = trt.create_inference_graph(input_graph_def=frozen_graph,
                                       outputs=output_names,
                                       max_batch_size=1,
                                       max_workspace_size_bytes=1 << 25,
                                       precision_mode='FP16',
                                       minimum_segment_size=50)
Example #28
0
# Read graph def (binary format)
with open(frozen_graph, 'rb') as f:
    frozen_graph_gd = tf.GraphDef()
    frozen_graph_gd.ParseFromString(f.read())
#%%

# If frozen graph is in text format load it like this
# import google.protobuf.text_format
# with open(frozen_graph, 'r') as f:
#     frozen_graph_gd = google.protobuf.text_format.Parse(f.read(), tf.GraphDef())

trt_graph = trt.create_inference_graph(
    input_graph_def=frozen_graph_gd,  # Pass the parsed graph def here
    outputs=['Binary_Seg/logits_to_softmax'],
    max_batch_size=1,
    # max_workspace_size_bytes=1 << 25,
    precision_mode='FP16',
    minimum_segment_size=50)

tf.io.write_graph(trt_graph,
                  "frozen_models/",
                  "trt_model_101.pb",
                  as_text=False)

tf.io.write_graph(trt_graph,
                  "frozen_models/",
                  "trt_model_101.txt",
                  as_text=True)

# %%
Example #29
0
    def __init__(self,
                 graph_path,
                 target_size=(320, 240),
                 tf_config=None,
                 trt_bool=False):
        self.target_size = target_size

        # load graph
        logger.info('loading graph from %s(default size=%dx%d)' %
                    (graph_path, target_size[0], target_size[1]))
        with tf.io.gfile.GFile(graph_path, 'rb') as f:
            graph_def = tf.compat.v1.GraphDef()
            graph_def.ParseFromString(f.read())

        if trt_bool is True:
            output_nodes = ["Openpose/concat_stage7"]
            graph_def = trt.create_inference_graph(
                graph_def,
                output_nodes,
                max_batch_size=1,
                max_workspace_size_bytes=1 << 20,
                precision_mode="FP16",
                # precision_mode="INT8",
                minimum_segment_size=3,
                is_dynamic_op=True,
                maximum_cached_engines=int(1e3),
                use_calibration=True,
            )

        self.graph = tf.compat.v1.get_default_graph()
        tf.import_graph_def(graph_def, name='TfPoseEstimator')
        self.persistent_sess = tf.compat.v1.Session(graph=self.graph,
                                                    config=tf_config)

        for ts in [
                n.name
                for n in tf.compat.v1.get_default_graph().as_graph_def().node
        ]:
            print(ts)

        self.tensor_image = self.graph.get_tensor_by_name(
            'TfPoseEstimator/image:0')
        self.tensor_output = self.graph.get_tensor_by_name(
            'TfPoseEstimator/Openpose/concat_stage7:0')
        self.tensor_heatMat = self.tensor_output[:, :, :, :19]
        self.tensor_pafMat = self.tensor_output[:, :, :, 19:]
        self.upsample_size = tf.compat.v1.placeholder(dtype=tf.int32,
                                                      shape=(2, ),
                                                      name='upsample_size')
        self.tensor_heatMat_up = tf.compat.v1.image.resize(
            self.tensor_output[:, :, :, :19],
            self.upsample_size,
            align_corners=False,
            name='upsample_heatmat')
        self.tensor_pafMat_up = tf.compat.v1.image.resize(
            self.tensor_output[:, :, :, 19:],
            self.upsample_size,
            align_corners=False,
            name='upsample_pafmat')
        if trt_bool is True:
            smoother = Smoother({'data': self.tensor_heatMat_up}, 25, 3.0, 19)
        else:
            smoother = Smoother({'data': self.tensor_heatMat_up}, 25, 3.0)
        gaussian_heatMat = smoother.get_output()

        max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat,
                                          window_shape=(3, 3),
                                          pooling_type='MAX',
                                          padding='SAME')
        self.tensor_peaks = tf.where(
            tf.equal(gaussian_heatMat, max_pooled_in_tensor), gaussian_heatMat,
            tf.zeros_like(gaussian_heatMat))

        self.heatMat = self.pafMat = None

        # warm-up
        self.persistent_sess.run(
            tf.compat.v1.variables_initializer([
                v for v in tf.compat.v1.global_variables()
                if v.name.split(':')[0] in [
                    x.decode('utf-8') for x in self.persistent_sess.run(
                        tf.compat.v1.report_uninitialized_variables())
                ]
            ]))
        self.persistent_sess.run(
            [self.tensor_peaks, self.tensor_heatMat_up, self.tensor_pafMat_up],
            feed_dict={
                self.tensor_image: [
                    np.ndarray(shape=(target_size[1], target_size[0], 3),
                               dtype=np.float32)
                ],
                self.upsample_size: [target_size[1], target_size[0]]
            })
        self.persistent_sess.run(
            [self.tensor_peaks, self.tensor_heatMat_up, self.tensor_pafMat_up],
            feed_dict={
                self.tensor_image: [
                    np.ndarray(shape=(target_size[1], target_size[0], 3),
                               dtype=np.float32)
                ],
                self.upsample_size: [target_size[1] // 2, target_size[0] // 2]
            })
        self.persistent_sess.run(
            [self.tensor_peaks, self.tensor_heatMat_up, self.tensor_pafMat_up],
            feed_dict={
                self.tensor_image: [
                    np.ndarray(shape=(target_size[1], target_size[0], 3),
                               dtype=np.float32)
                ],
                self.upsample_size: [target_size[1] // 4, target_size[0] // 4]
            })

        # logs
        if self.tensor_image.dtype == tf.quint8:
            logger.info('quantization mode enabled.')
Example #30
0
from tensorflow.core.framework import tensor_shape_pb2
from tensorflow.python.framework import graph_util
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

graph_def = tf.GraphDef()
img = np.random.rand(1, 3, 512, 512)

## open pb file for inference
with tf.gfile.GFile("./Joint_PER300_POST64.pb", 'rb') as f:
    graph_def.ParseFromString(f.read())
    converted_graph_def = trt.create_inference_graph(
        input_graph_def=graph_def,
        max_batch_size=1,
        is_dynamic_op=False,
        outputs=[
            'import/output/labels:0', 'import/output/boxes:0',
            "import/output/scores:0"
        ])
    output_node = tf.import_graph_def(converted_graph_def,
                                      return_elements=[
                                          'import/output/labels:0',
                                          'import/output/boxes:0',
                                          "import/output/scores:0"
                                      ])

with tf.Session() as sess:
    ## print tensorflow-tensorrt graph node name
    tensor_name_list = [
        tensor.name for tensor in tf.get_default_graph().as_graph_def().node
    ]
Example #31
0
                                        custom_objects=custom_objects)
    else:
        model = keras.models.load_model(args.input)

    model.summary()
    if args.appearance:
        trk_fea = Input(shape=(500, ))
        det_fea = Input(shape=(500, ))
        out = model.get_layer("lambda_1")([trk_fea, det_fea])
        out = model.get_layer("dense_4")(out)
        model = Model(inputs=[trk_fea, det_fea], outputs=out)

    outputnames = []
    for output in model.outputs:
        outputnames.append(output.name.split(':')[0])
    print(outputnames)
    keras_to_tensorflow(model, outputnames, args.output)

    if args.check:
        graph_def = tf.GraphDef()
        tensornames = []
        for name in outputnames:
            tensornames.append(f"{name}:0")

        with open(args.output, 'rb') as graph_file:
            graph_def.ParseFromString(graph_file.read())
        trt_model = trt.create_inference_graph(graph_def,
                                               tensornames,
                                               is_dynamic_op=True,
                                               precision_mode='fp16')
Example #32
0
    saver.restore(sess, "/home/vatsal/Downloads/attachments/models/model")
    your_outputs = ["output_tensor/Softmax"]
    frozen_graph = tf.graph_util.convert_variables_to_constants(
        sess,
        tf.get_default_graph().as_graph_def(),# graph+weight from the session
        output_node_names=your_outputs)
    with gfile.FastGFile("/home/vatsal/Downloads/attachments/models/frozen_model.pb", 'wb') as f:
        f.write(frozen_graph.SerializeToString())
    print("Frozen model is successfully stored!")
# =============================================================================

# =============================================================================
# %% Optimize the frozen model to TensorRT graph
trt_graph = trt.create_inference_graph(
    input_graph_def=frozen_graph,# frozen model
    outputs=your_outputs,
    max_batch_size=2,# specify your max batch size
    max_workspace_size_bytes=2*(10**9),# specify the max workspace
    precision_mode="FP16")

with gfile.FastGFile("/home/vatsal/Downloads/attachments/models/TensorRT_model.pb", 'wb') as f:
    f.write(trt_graph.SerializeToString())
print("TensorRT model is successfully stored!")
# =============================================================================

# =============================================================================
# %% Count how many nodes/operations before and after optimization
# check how many ops of the original frozen model
all_nodes = len([1 for n in frozen_graph.node])
print("numb. of all_nodes in frozen graph:", all_nodes)

# check how many ops that is converted to TensorRT engine
Example #33
0
#%%
import tensorflow as tf
from tensorflow.python.compiler.tensorrt import trt_convert as trt
# import tensorflow.contrib.tensorrt as trt
# %%
frozen_graph  ='RUNS/model_495_v1_0_new.pb'

# output_names = ['conv2d_59','conv2d_67','conv2d_75']

# Read graph def (binary format)
with open(frozen_graph, 'rb') as f:
    frozen_graph_gd = tf.GraphDef()
    frozen_graph_gd.ParseFromString(f.read())


trt_graph = trt.create_inference_graph(
    input_graph_def=frozen_graph_gd,  # Pass the parsed graph def here
    outputs=['Input/X','Binary_Seg/FULL_CONV_binary','Instance_seg/FULL_CONV_instance'],
    max_batch_size=1,
    # max_workspace_size_bytes=1 << 25,
    precision_mode='FP16',
    minimum_segment_size=50
)

tf.io.write_graph(trt_graph, "frozen_models/",
                     "trt_model_101.pb", as_text=False)

tf.io.write_graph(trt_graph, "frozen_models/",
                     "trt_model_101.txt", as_text=True)

category_index = label_map_util.create_category_index_from_labelmap(
    PATH_TO_LABELS, use_display_name=True)

# First deserialize your frozen graph:
with tf.compat.v1.Session() as sess:
    with tf.compat.v2.io.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as f:
        frozen_graph = tf.compat.v1.GraphDef()
        frozen_graph.ParseFromString(f.read())
outputs = [
    'num_detections', 'detection_boxes', 'detection_scores',
    'detection_classes'
]
trt_graph = trt.create_inference_graph(input_graph_def=frozen_graph,
                                       outputs=outputs,
                                       max_batch_size=1,
                                       max_workspace_size_bytes=1 << 3,
                                       precision_mode="FP32",
                                       minimum_segment_size=5)


def run_inference_for_single_image(image):
    with tf.compat.v1.Graph().as_default() as g:
        # Get handles to input and output tensors
        inputs_ = g.get_tensor_by_name('input_images:0')
        outputs_ = [o + ':0' for o in outputs]

        tf.import_graph_def(trt_graph,
                            input_map={"input_images": inputs},
                            return_elements=outputs,
                            name='')
        all_tensor_names = outputs_