def tf_to_trt_graph(graph, y_name, batch_size, precision): # New code in May 2019, not stable yet #converter = trt.TrtGraphConverter( # input_graph_def=graph.frozen, nodes_blacklist=graph.y_name, # max_batch_size=batch_size, max_workspace_size_bytes=1 << 30, # precision_mode=precision) #self.tftrt_graph = converter.convert() if precision == "INT8": calib_graph = trt.create_inference_graph( graph, outputs=y_name, max_batch_size=batch_size, max_workspace_size_bytes=1 << 25, precision_mode=precision, minimum_segment_size=2) tftrt_graph = trt.calib_graph_to_infer_graph(calibGraph) else: tftrt_graph = trt.create_inference_graph( graph, outputs=y_name, max_batch_size=batch_size, max_workspace_size_bytes=1 << 25, precision_mode=precision, minimum_segment_size=2) return tftrt_graph
def convert(model_path, output_path, tf1, precision, max_workspace_size, min_segment_size, saved_model_tags, build, batch_shape): if not tf1: params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace( max_workspace_size_bytes=max_workspace_size, precision_mode=precision, minimum_segment_size=min_segment_size) converter = trt.TrtGraphConverterV2( input_saved_model_dir=model_path, input_saved_model_tags=saved_model_tags, conversion_params=params) try: converter.convert() except Exception as e: raise RuntimeError('{}. Just try passing "--tf1".'.format(e)) if build or batch_shape[0]: def reference_data_gen(): inp1 = tf.random.normal(size=batch_shape).astype(tf.float32) inp2 = tf.random.normal(size=batch_shape).astype(tf.float32) yield (inp1, inp2) converter.build(reference_data_gen) converter.save(output_saved_model_dir=output_path) else: trt.create_inference_graph(None, None, max_batch_size=1, max_workspace_size_bytes=max_workspace_size, precision_mode=precision, minimum_segment_size=minimum_segment_size, is_dynamic_op=True, input_saved_model_dir=model_path, input_saved_model_tags=saved_model_tags, output_saved_model_dir=output_path)
def create_inference_graph( input_graph_def, outputs, max_batch_size=1, max_workspace_size_bytes=trt_convert.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES, precision_mode=trt_convert.TrtPrecisionMode.FP32, minimum_segment_size=3, is_dynamic_op=False, maximum_cached_engines=1, cached_engine_batches=None, input_saved_model_dir=None, input_saved_model_tags=None, output_saved_model_dir=None, session_config=None): return trt_convert.create_inference_graph( input_graph_def=input_graph_def, outputs=outputs, max_batch_size=max_batch_size, max_workspace_size_bytes=max_workspace_size_bytes, precision_mode=precision_mode, minimum_segment_size=minimum_segment_size, is_dynamic_op=is_dynamic_op, maximum_cached_engines=maximum_cached_engines, cached_engine_batches=cached_engine_batches, input_saved_model_dir=input_saved_model_dir, input_saved_model_tags=input_saved_model_tags, output_saved_model_dir=output_saved_model_dir, session_config=session_config)
def _TestCreateInferenceGraph(self, input_saved_model_dir=None, output_saved_model_dir=None): """General method to test trt_convert.create_inference_graph().""" input_graph_def = None if input_saved_model_dir else self._GetGraphDef( ) output_graph_def = trt_convert.create_inference_graph( input_graph_def, ["output"], max_workspace_size_bytes=TrtConvertTest. _TRT_MAX_WORKSPACE_SIZE_BYTES, input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) graph_defs_to_verify = [output_graph_def] if output_saved_model_dir is not None: saved_model_graph_def = saved_model_utils.get_meta_graph_def( output_saved_model_dir, tag_constants.SERVING).graph_def self.assertTrue( isinstance(saved_model_graph_def, graph_pb2.GraphDef)) graph_defs_to_verify.append(saved_model_graph_def) for graph_def in graph_defs_to_verify: node_name_to_op = {node.name: node.op for node in graph_def.node} self.assertEqual( { "input": "Placeholder", "TRTEngineOp_0": "TRTEngineOp", "output": "Identity" }, node_name_to_op)
def trt_cfe_test(): graph = tf.Graph() with graph.as_default(): with tf.Session() as sess: # First deserialize your frozen graph: with tf.io.gfile.GFile("/home/vilon_tao/Projects/machine-learning/tf20/models/freezed_open_shelf_cfe_model.pb", 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) # Now you can create a TensorRT inference graph from your # frozen graph: trt_graph = trt.create_inference_graph( input_graph_def=graph_def, outputs=['loc_conf/loc_conf_concat/concat'], max_batch_size=20, max_workspace_size_bytes=2 << 10, precision_mode="FP32") tf.import_graph_def(trt_graph, name='') # tf.import_graph_def(graph_def, name='') tf_input = sess.graph.get_tensor_by_name('input_1:0') tf_output = sess.graph.get_tensor_by_name('loc_conf/loc_conf_concat/concat:0') start_time = time.time() for i in range(100): predictions = sess.run(tf_output, feed_dict={tf_input: np.random.rand(8, 512, 512, 3).astype(dtype=np.float32)}) print('TRT inference with 8 * 512 * 512 *3 cost: {} ms.'.format(1000 * (time.time() - start_time)/100))
def create_inference_graph(input_graph_def, outputs, max_batch_size=1, max_workspace_size_bytes=trt_convert. DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES, precision_mode=trt_convert.TrtPrecisionMode.FP32, minimum_segment_size=3, is_dynamic_op=False, maximum_cached_engines=1, cached_engine_batches=None, input_saved_model_dir=None, input_saved_model_tags=None, output_saved_model_dir=None, session_config=None): return trt_convert.create_inference_graph( input_graph_def=input_graph_def, outputs=outputs, max_batch_size=max_batch_size, max_workspace_size_bytes=max_workspace_size_bytes, precision_mode=precision_mode, minimum_segment_size=minimum_segment_size, is_dynamic_op=is_dynamic_op, maximum_cached_engines=maximum_cached_engines, cached_engine_batches=cached_engine_batches, input_saved_model_dir=input_saved_model_dir, input_saved_model_tags=input_saved_model_tags, output_saved_model_dir=output_saved_model_dir, session_config=session_config)
def _TestCreateInferenceGraph(self, input_saved_model_dir=None, output_saved_model_dir=None): """General method to test trt_convert.create_inference_graph().""" input_graph_def = None if input_saved_model_dir else self._GetGraphDef() output_graph_def = trt_convert.create_inference_graph( input_graph_def, ["output"], max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES, input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) graph_defs_to_verify = [output_graph_def] if output_saved_model_dir is not None: saved_model_graph_def = saved_model_utils.get_meta_graph_def( output_saved_model_dir, tag_constants.SERVING).graph_def self.assertTrue(isinstance(saved_model_graph_def, graph_pb2.GraphDef)) graph_defs_to_verify.append(saved_model_graph_def) for graph_def in graph_defs_to_verify: node_name_to_op = {node.name: node.op for node in graph_def.node} self.assertEqual({ "input": "Placeholder", "TRTEngineOp_0": "TRTEngineOp", "output": "Identity" }, node_name_to_op)
def create_inference_graph(input_graph_def, outputs, max_batch_size=1, max_workspace_size_bytes=2 << 20, precision_mode=trt_convert.TrtPrecisionMode.FP32, minimum_segment_size=3, is_dynamic_op=False, maximum_cached_engines=1, cached_engine_batches=None, use_calibration=True, input_saved_model_dir=None, input_saved_model_tags=None, output_saved_model_dir=None, session_config=None): return trt_convert.create_inference_graph( input_graph_def=input_graph_def, outputs=outputs, max_batch_size=max_batch_size, max_workspace_size_bytes=max_workspace_size_bytes, precision_mode=precision_mode, minimum_segment_size=minimum_segment_size, is_dynamic_op=is_dynamic_op, maximum_cached_engines=maximum_cached_engines, cached_engine_batches=cached_engine_batches, use_calibration=use_calibration, input_saved_model_dir=input_saved_model_dir, input_saved_model_tags=input_saved_model_tags, output_saved_model_dir=output_saved_model_dir, session_config=session_config)
def frozen_graph_trt( input_frozen_graph_path, output_dir, max_batch_size, precision_mode, is_dynamic_op): ''' create a TensorRT inference graph from a Frozen Graph ''' output_node_names = [BOXES_NAME, CLASSES_NAME, SCORES_NAME, NUM_DETECTIONS_NAME] if not os.path.exists(output_dir): os.makedirs(output_dir) output_frozen_graph_path = os.path.join(output_dir, 'trt_frozen_graph.pb') with tf.io.gfile.GFile(input_frozen_graph_path, 'rb') as f: graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(f.read()) trt_graph = trt.create_inference_graph( input_graph_def=graph_def, outputs=output_node_names, max_batch_size=max_batch_size, max_workspace_size_bytes=trt.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES, precision_mode=precision_mode, is_dynamic_op=False) with open(output_frozen_graph_path, 'wb') as f: f.write(trt_graph.SerializeToString())
def GetINT8(graph, graph_def, nodes_list): with graph.as_default(): trt_graph = trt.create_inference_graph( graph_def, nodes_list, precision_mode='INT8', max_workspace_size_bytes=1 << 30, max_batch_size=32) return trt_graph
def convert_with_tensorrt(args): """Function triggered by 'convert tensorrt' command. Args: args: A namespace parsed from command line. """ # Import here instead of at top, because this will crash if TensorRT is # not installed from tensorflow.python.compiler.tensorrt import trt_convert # pylint: disable=g-import-not-at-top trt_convert.create_inference_graph( None, None, max_batch_size=args.max_batch_size, max_workspace_size_bytes=args.max_workspace_size_bytes, precision_mode=args.precision_mode, minimum_segment_size=args.minimum_segment_size, is_dynamic_op=args.is_dynamic_op, input_saved_model_dir=args.dir, input_saved_model_tags=args.tag_set.split(','), output_saved_model_dir=args.output_dir)
def trt_classfication_test(): graph = tf.Graph() with graph.as_default(): with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: # First deserialize your frozen graph: with tf.io.gfile.GFile("./models/trt_model/classfication.pb", 'rb') as f: # with tf.io.gfile.GFile("/home/vilon_tao/Projects/machine-learning/tf20/models/freezed_open_shelf_resenet_model.pb", 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) # count how many ops in frozen model print('before trt:>>>>>>>>>>>>>>>>>') for n in graph_def.node: print(n.op) trt_engine_ops = len([1 for n in graph_def.node if str(n.op) == 'TRTEngineOp']) print("numb. of trt_engine_ops in frozen_graph:", trt_engine_ops) all_ops = len([1 for n in graph_def.node]) print("numb. of all_ops in frozen_graph:", all_ops) # Now you can create a TensorRT inference graph from your # frozen graph: trt_graph = trt.create_inference_graph( input_graph_def=graph_def, outputs=['embeddings/fc_512/BiasAdd'], max_batch_size=20, # is_dynamic_op=True, # maximum_cached_engines=20, # cached_engine_batches=[20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20], max_workspace_size_bytes=2 << 11, precision_mode="FP32") # count how many ops in trt_graph print('after trt:>>>>>>>>>>>>>>>>>') for n in trt_graph.node: print (n.op) trt_engine_ops = len([1 for n in trt_graph.node if str(n.op) == 'TRTEngineOp']) print("numb. of trt_engine_ops in trt_graph", trt_engine_ops) all_ops = len([1 for n in trt_graph.node]) print("numb. of all_ops in in trt_graph:", all_ops) # graph_io.write_graph(trt_graph, './models/trt_model/', 'classfication.pb', as_text=False) # tf.import_graph_def(graph_def, name='') tf.import_graph_def(trt_graph, name='') tf_input = sess.graph.get_tensor_by_name('input_1:0') tf_output = sess.graph.get_tensor_by_name('embeddings/fc_512/BiasAdd:0') start_time = time.time() for i in range(100): embeddings = sess.run(tf_output, feed_dict={tf_input: np.random.rand(8, 96, 96, 3).astype(dtype=np.float32)}) print('TRT inference with 8 * 96 * 96 * 3 cost: {} ms.'.format(1000 * (time.time() - start_time)/100))
def export(saved_model_dir, tensorrt_model_dir, max_batch_size=1, max_workspace_size_bytes=2 << 20, precision_mode='FP16', minimum_segment_size=3, is_dynamic_op=False, maximum_cached_engines=1): """Exports TensorRT model.""" trt_convert.create_inference_graph( None, None, max_batch_size=max_batch_size, max_workspace_size_bytes=max_workspace_size_bytes, precision_mode=precision_mode, minimum_segment_size=minimum_segment_size, is_dynamic_op=is_dynamic_op, maximum_cached_engines=maximum_cached_engines, input_saved_model_dir=saved_model_dir, input_saved_model_tags=None, input_saved_model_signature_key=None, output_saved_model_dir=tensorrt_model_dir)
def convert_with_tensorrt(args): """Function triggered by 'convert tensorrt' command. Args: args: A namespace parsed from command line. """ # Import here instead of at top, because this will crash if TensorRT is # not installed from tensorflow.python.compiler.tensorrt import trt_convert as trt # pylint: disable=g-import-not-at-top if not args.convert_tf1_model: params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace( max_workspace_size_bytes=args.max_workspace_size_bytes, precision_mode=args.precision_mode, minimum_segment_size=args.minimum_segment_size) converter = trt.TrtGraphConverterV2( input_saved_model_dir=args.dir, input_saved_model_tags=args.tag_set.split(','), conversion_params=params) try: converter.convert() except Exception as e: raise RuntimeError( '{}. Try passing "--convert_tf1_model=True".'.format(e)) converter.save(output_saved_model_dir=args.output_dir) else: trt.create_inference_graph( None, None, max_batch_size=1, max_workspace_size_bytes=args.max_workspace_size_bytes, precision_mode=args.precision_mode, minimum_segment_size=args.minimum_segment_size, is_dynamic_op=True, input_saved_model_dir=args.dir, input_saved_model_tags=args.tag_set.split(','), output_saved_model_dir=args.output_dir)
def export(saved_model_dir, tensorrt_model_dir, max_batch_size=1, max_workspace_size_bytes=2 << 20, precision_mode='FP16', minimum_segment_size=3, is_dynamic_op=False, maximum_cached_engines=1): """Exports TensorRT model.""" config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) trt_convert.create_inference_graph( None, None, max_batch_size=max_batch_size, max_workspace_size_bytes=max_workspace_size_bytes, precision_mode=precision_mode, minimum_segment_size=minimum_segment_size, is_dynamic_op=is_dynamic_op, maximum_cached_engines=maximum_cached_engines, input_saved_model_dir=saved_model_dir, input_saved_model_tags=None, input_saved_model_signature_key=None, output_saved_model_dir=tensorrt_model_dir, session_config=config)
def convert_graphdef(model_file, output_layer, output_file): graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with open(model_file, "rb") as f: graph_def.ParseFromString(f.read()) with graph.as_default(): tf.import_graph_def(graph_def) trt_graph = trt.create_inference_graph(input_graph_def=graph_def, outputs=[output_layer], precision_mode='FP16') tf.io.write_graph(trt_graph, '/tmp/', output_file, as_text=False) return trt_graph
def testCreateInferenceGraph_MinimumSegmentSize(self): if not trt_convert.is_tensorrt_enabled(): return output_graph_def = trt_convert.create_inference_graph( self._GetGraphDef(), ["output"], minimum_segment_size=5, max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES, is_dynamic_op=False) node_name_to_op = {node.name: node.op for node in output_graph_def.node} self.assertEqual({ "v1/read": "Const", "input": "Placeholder", "add": "Add", "mul": "Mul", "add_1": "Add", "output": "Identity" }, node_name_to_op)
def _GetGraphDef(self, use_trt, max_batch_size, model_dir): """Get the frozen mnist GraphDef. Args: use_trt: whether use TF-TRT to convert the graph. max_batch_size: the max batch size to apply during TF-TRT conversion. model_dir: the model directory to load the checkpoints. Returns: The frozen mnist GraphDef. """ graph = ops.Graph() with self.session(graph=graph) as sess: with graph.device('/GPU:0'): x = array_ops.placeholder( shape=(None, 28, 28, 1), dtype=dtypes.float32, name=INPUT_NODE_NAME) self._BuildGraph(x) # Load weights mnist_saver = saver.Saver() checkpoint_file = latest_checkpoint(model_dir) mnist_saver.restore(sess, checkpoint_file) # Freeze graph_def = graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names=[OUTPUT_NODE_NAME]) # Convert with TF-TRT if use_trt: logging.info('Number of nodes before TF-TRT conversion: %d', len(graph_def.node)) graph_def = trt_convert.create_inference_graph( graph_def, outputs=[OUTPUT_NODE_NAME], max_batch_size=max_batch_size, precision_mode='INT8', # There is a 2GB GPU memory limit for each test, so we set # max_workspace_size_bytes to 256MB to leave enough room for TF # runtime to allocate GPU memory. max_workspace_size_bytes=1 << 28, minimum_segment_size=2, use_calibration=False, ) logging.info('Number of nodes after TF-TRT conversion: %d', len(graph_def.node)) num_engines = len( [1 for n in graph_def.node if str(n.op) == 'TRTEngineOp']) self.assertEqual(1, num_engines) return graph_def
def testCreateInferenceGraph_MinimumSegmentSize(self): if not trt_convert.is_tensorrt_enabled(): return output_graph_def = trt_convert.create_inference_graph( self._GetGraphDef(), ["output"], minimum_segment_size=5, max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES, is_dynamic_op=False) node_name_to_op = {node.name: node.op for node in output_graph_def.node} self.assertEqual({ "v1/read": "Const", "input": "Placeholder", "add": "Add", "mul": "Mul", "add_1": "Add", "output": "Identity" }, node_name_to_op)
def testCreateInferenceGraph_DynamicOp(self): if not is_tensorrt_enabled(): return enable_test_value() tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir2") output_saved_model_dir = os.path.join(tmp_dir, "out_dir2") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = trt_convert.create_inference_graph( None, None, max_workspace_size_bytes=TrtConvertTest. _TRT_MAX_WORKSPACE_SIZE_BYTES, is_dynamic_op=True, maximum_cached_engines=2, input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.test_session(config=self._GetConfigProto()) as sess: # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1, True) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2, True) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3, True) # Test the output SavedModel with ops.Graph().as_default(): with self.test_session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1, True) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2, True) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3, True)
def _GetTrtGraphDef(self, run_params, graph_state, gdef): """Return trt converted graphdef.""" params = self._GetParamsCached() conversion_params = self.GetConversionParams(run_params) logging.info(conversion_params) config_for_trt = self._GetConfigProto(run_params, graph_state) return trt_convert.create_inference_graph( input_graph_def=gdef, outputs=params.input_names + params.output_names, max_batch_size=conversion_params.max_batch_size, max_workspace_size_bytes=conversion_params.max_workspace_size_bytes, precision_mode=conversion_params.precision_mode, minimum_segment_size=conversion_params.minimum_segment_size, is_dynamic_op=conversion_params.is_dynamic_op, maximum_cached_engines=conversion_params.maximum_cached_engines, cached_engine_batches=conversion_params.cached_engine_batches, use_calibration=conversion_params.use_calibration, session_config=config_for_trt)
def _GetTrtGraphDef(self, run_params, graph_state, gdef): """Return trt converted graphdef.""" params = self._GetParamsCached() conversion_params = self.GetConversionParams(run_params) logging.info(conversion_params) config_for_trt = self._GetConfigProto(run_params, graph_state) return trt_convert.create_inference_graph( input_graph_def=gdef, outputs=params.input_names + params.output_names, max_batch_size=conversion_params.max_batch_size, max_workspace_size_bytes=conversion_params.max_workspace_size_bytes, precision_mode=conversion_params.precision_mode, minimum_segment_size=conversion_params.minimum_segment_size, is_dynamic_op=conversion_params.is_dynamic_op, maximum_cached_engines=conversion_params.maximum_cached_engines, cached_engine_batches=conversion_params.cached_engine_batches, use_calibration=conversion_params.use_calibration, session_config=config_for_trt)
def testCreateInferenceGraph_StaticOp(self): if not is_tensorrt_enabled(): return enable_test_value() tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir3") output_saved_model_dir = os.path.join(tmp_dir, "out_dir3") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = trt_convert.create_inference_graph( None, None, max_batch_size=1, max_workspace_size_bytes=TrtConvertTest. _TRT_MAX_WORKSPACE_SIZE_BYTES, is_dynamic_op=False, maximum_cached_engines=2, # This is noop, added just for testing. input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.test_session(config=self._GetConfigProto()) as sess: # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, True) # Run with batch size 2, which exceed the max_batch_size, it should fall # back to TF function. self._TestRun(sess, 2, False) # Test the output SavedModel with ops.Graph().as_default(): with self.test_session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, True) # Run with batch size 2, which exceed the max_batch_size, it should fall # back to TF function. self._TestRun(sess, 2, False)
def testCreateInferenceGraph_DynamicOp(self): if not trt_convert.is_tensorrt_enabled(): return trt_convert.enable_test_value() tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir2") output_saved_model_dir = os.path.join(tmp_dir, "out_dir2") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = trt_convert.create_inference_graph( None, None, max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES, is_dynamic_op=True, maximum_cached_engines=2, input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.test_session(config=self._GetConfigProto()) as sess: # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1, True) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2, True) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3, True) # Test the output SavedModel with ops.Graph().as_default(): with self.test_session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1, True) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2, True) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3, True)
def testCreateInferenceGraph_StaticOp(self): if not trt_convert.is_tensorrt_enabled(): return trt_convert.enable_test_value() tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir3") output_saved_model_dir = os.path.join(tmp_dir, "out_dir3") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = trt_convert.create_inference_graph( None, None, max_batch_size=1, max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES, is_dynamic_op=False, maximum_cached_engines=2, # This is noop, added just for testing. input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.test_session(config=self._GetConfigProto()) as sess: # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, True) # Run with batch size 2, which exceed the max_batch_size, it should fall # back to TF function. self._TestRun(sess, 2, False) # Test the output SavedModel with ops.Graph().as_default(): with self.test_session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, True) # Run with batch size 2, which exceed the max_batch_size, it should fall # back to TF function. self._TestRun(sess, 2, False)
def saved_model_trt( input_saved_model_dir, output_dir, max_batch_size, precision_mode, is_dynamic_op): ''' create a TensorRT inference graph from a SavedModel ''' output_frozen_graph_path = os.path.join(output_dir, 'trt_frozen_graph.pb') trt_graph = trt.create_inference_graph( input_graph_def=None, outputs=None, input_saved_model_dir=input_saved_model_dir, input_saved_model_tags=['serve'], max_batch_size=max_batch_size, max_workspace_size_bytes=trt.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES, precision_mode=precision_mode, output_saved_model_dir=output_dir, is_dynamic_op=False) with open(output_frozen_graph_path, 'wb') as f: f.write(trt_graph.SerializeToString())
#import matplotlib.pyplot as plt #import matplotlib.patches as patches import tensorflow as tf import numpy as np import time # from tf_trt_models.tf_trt_models.detection import download_detection_model, build_detection_graph IMAGE_PATH = 'data/warriors.jpg' FROZEN_GRAPH_NAME = 'data/frozen_inference_graph_face.pb' output_dir = '' frozen_graph = tf.GraphDef() with open(os.path.join(output_dir, FROZEN_GRAPH_NAME), 'rb') as f: frozen_graph.ParseFromString(f.read()) INPUT_NAME = 'image_tensor' BOXES_NAME = 'detection_boxes' CLASSES_NAME = 'detection_classes' SCORES_NAME = 'detection_scores' MASKS_NAME = 'detection_masks' NUM_DETECTIONS_NAME = 'num_detections' input_names = [INPUT_NAME] output_names = [BOXES_NAME, CLASSES_NAME, SCORES_NAME, NUM_DETECTIONS_NAME] trt_graph = trt.create_inference_graph(input_graph_def=frozen_graph, outputs=output_names, max_batch_size=1, max_workspace_size_bytes=1 << 25, precision_mode='FP16', minimum_segment_size=50)
# Read graph def (binary format) with open(frozen_graph, 'rb') as f: frozen_graph_gd = tf.GraphDef() frozen_graph_gd.ParseFromString(f.read()) #%% # If frozen graph is in text format load it like this # import google.protobuf.text_format # with open(frozen_graph, 'r') as f: # frozen_graph_gd = google.protobuf.text_format.Parse(f.read(), tf.GraphDef()) trt_graph = trt.create_inference_graph( input_graph_def=frozen_graph_gd, # Pass the parsed graph def here outputs=['Binary_Seg/logits_to_softmax'], max_batch_size=1, # max_workspace_size_bytes=1 << 25, precision_mode='FP16', minimum_segment_size=50) tf.io.write_graph(trt_graph, "frozen_models/", "trt_model_101.pb", as_text=False) tf.io.write_graph(trt_graph, "frozen_models/", "trt_model_101.txt", as_text=True) # %%
def __init__(self, graph_path, target_size=(320, 240), tf_config=None, trt_bool=False): self.target_size = target_size # load graph logger.info('loading graph from %s(default size=%dx%d)' % (graph_path, target_size[0], target_size[1])) with tf.io.gfile.GFile(graph_path, 'rb') as f: graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(f.read()) if trt_bool is True: output_nodes = ["Openpose/concat_stage7"] graph_def = trt.create_inference_graph( graph_def, output_nodes, max_batch_size=1, max_workspace_size_bytes=1 << 20, precision_mode="FP16", # precision_mode="INT8", minimum_segment_size=3, is_dynamic_op=True, maximum_cached_engines=int(1e3), use_calibration=True, ) self.graph = tf.compat.v1.get_default_graph() tf.import_graph_def(graph_def, name='TfPoseEstimator') self.persistent_sess = tf.compat.v1.Session(graph=self.graph, config=tf_config) for ts in [ n.name for n in tf.compat.v1.get_default_graph().as_graph_def().node ]: print(ts) self.tensor_image = self.graph.get_tensor_by_name( 'TfPoseEstimator/image:0') self.tensor_output = self.graph.get_tensor_by_name( 'TfPoseEstimator/Openpose/concat_stage7:0') self.tensor_heatMat = self.tensor_output[:, :, :, :19] self.tensor_pafMat = self.tensor_output[:, :, :, 19:] self.upsample_size = tf.compat.v1.placeholder(dtype=tf.int32, shape=(2, ), name='upsample_size') self.tensor_heatMat_up = tf.compat.v1.image.resize( self.tensor_output[:, :, :, :19], self.upsample_size, align_corners=False, name='upsample_heatmat') self.tensor_pafMat_up = tf.compat.v1.image.resize( self.tensor_output[:, :, :, 19:], self.upsample_size, align_corners=False, name='upsample_pafmat') if trt_bool is True: smoother = Smoother({'data': self.tensor_heatMat_up}, 25, 3.0, 19) else: smoother = Smoother({'data': self.tensor_heatMat_up}, 25, 3.0) gaussian_heatMat = smoother.get_output() max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat, window_shape=(3, 3), pooling_type='MAX', padding='SAME') self.tensor_peaks = tf.where( tf.equal(gaussian_heatMat, max_pooled_in_tensor), gaussian_heatMat, tf.zeros_like(gaussian_heatMat)) self.heatMat = self.pafMat = None # warm-up self.persistent_sess.run( tf.compat.v1.variables_initializer([ v for v in tf.compat.v1.global_variables() if v.name.split(':')[0] in [ x.decode('utf-8') for x in self.persistent_sess.run( tf.compat.v1.report_uninitialized_variables()) ] ])) self.persistent_sess.run( [self.tensor_peaks, self.tensor_heatMat_up, self.tensor_pafMat_up], feed_dict={ self.tensor_image: [ np.ndarray(shape=(target_size[1], target_size[0], 3), dtype=np.float32) ], self.upsample_size: [target_size[1], target_size[0]] }) self.persistent_sess.run( [self.tensor_peaks, self.tensor_heatMat_up, self.tensor_pafMat_up], feed_dict={ self.tensor_image: [ np.ndarray(shape=(target_size[1], target_size[0], 3), dtype=np.float32) ], self.upsample_size: [target_size[1] // 2, target_size[0] // 2] }) self.persistent_sess.run( [self.tensor_peaks, self.tensor_heatMat_up, self.tensor_pafMat_up], feed_dict={ self.tensor_image: [ np.ndarray(shape=(target_size[1], target_size[0], 3), dtype=np.float32) ], self.upsample_size: [target_size[1] // 4, target_size[0] // 4] }) # logs if self.tensor_image.dtype == tf.quint8: logger.info('quantization mode enabled.')
from tensorflow.core.framework import tensor_shape_pb2 from tensorflow.python.framework import graph_util import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" graph_def = tf.GraphDef() img = np.random.rand(1, 3, 512, 512) ## open pb file for inference with tf.gfile.GFile("./Joint_PER300_POST64.pb", 'rb') as f: graph_def.ParseFromString(f.read()) converted_graph_def = trt.create_inference_graph( input_graph_def=graph_def, max_batch_size=1, is_dynamic_op=False, outputs=[ 'import/output/labels:0', 'import/output/boxes:0', "import/output/scores:0" ]) output_node = tf.import_graph_def(converted_graph_def, return_elements=[ 'import/output/labels:0', 'import/output/boxes:0', "import/output/scores:0" ]) with tf.Session() as sess: ## print tensorflow-tensorrt graph node name tensor_name_list = [ tensor.name for tensor in tf.get_default_graph().as_graph_def().node ]
custom_objects=custom_objects) else: model = keras.models.load_model(args.input) model.summary() if args.appearance: trk_fea = Input(shape=(500, )) det_fea = Input(shape=(500, )) out = model.get_layer("lambda_1")([trk_fea, det_fea]) out = model.get_layer("dense_4")(out) model = Model(inputs=[trk_fea, det_fea], outputs=out) outputnames = [] for output in model.outputs: outputnames.append(output.name.split(':')[0]) print(outputnames) keras_to_tensorflow(model, outputnames, args.output) if args.check: graph_def = tf.GraphDef() tensornames = [] for name in outputnames: tensornames.append(f"{name}:0") with open(args.output, 'rb') as graph_file: graph_def.ParseFromString(graph_file.read()) trt_model = trt.create_inference_graph(graph_def, tensornames, is_dynamic_op=True, precision_mode='fp16')
saver.restore(sess, "/home/vatsal/Downloads/attachments/models/model") your_outputs = ["output_tensor/Softmax"] frozen_graph = tf.graph_util.convert_variables_to_constants( sess, tf.get_default_graph().as_graph_def(),# graph+weight from the session output_node_names=your_outputs) with gfile.FastGFile("/home/vatsal/Downloads/attachments/models/frozen_model.pb", 'wb') as f: f.write(frozen_graph.SerializeToString()) print("Frozen model is successfully stored!") # ============================================================================= # ============================================================================= # %% Optimize the frozen model to TensorRT graph trt_graph = trt.create_inference_graph( input_graph_def=frozen_graph,# frozen model outputs=your_outputs, max_batch_size=2,# specify your max batch size max_workspace_size_bytes=2*(10**9),# specify the max workspace precision_mode="FP16") with gfile.FastGFile("/home/vatsal/Downloads/attachments/models/TensorRT_model.pb", 'wb') as f: f.write(trt_graph.SerializeToString()) print("TensorRT model is successfully stored!") # ============================================================================= # ============================================================================= # %% Count how many nodes/operations before and after optimization # check how many ops of the original frozen model all_nodes = len([1 for n in frozen_graph.node]) print("numb. of all_nodes in frozen graph:", all_nodes) # check how many ops that is converted to TensorRT engine
#%% import tensorflow as tf from tensorflow.python.compiler.tensorrt import trt_convert as trt # import tensorflow.contrib.tensorrt as trt # %% frozen_graph ='RUNS/model_495_v1_0_new.pb' # output_names = ['conv2d_59','conv2d_67','conv2d_75'] # Read graph def (binary format) with open(frozen_graph, 'rb') as f: frozen_graph_gd = tf.GraphDef() frozen_graph_gd.ParseFromString(f.read()) trt_graph = trt.create_inference_graph( input_graph_def=frozen_graph_gd, # Pass the parsed graph def here outputs=['Input/X','Binary_Seg/FULL_CONV_binary','Instance_seg/FULL_CONV_instance'], max_batch_size=1, # max_workspace_size_bytes=1 << 25, precision_mode='FP16', minimum_segment_size=50 ) tf.io.write_graph(trt_graph, "frozen_models/", "trt_model_101.pb", as_text=False) tf.io.write_graph(trt_graph, "frozen_models/", "trt_model_101.txt", as_text=True)
category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) # First deserialize your frozen graph: with tf.compat.v1.Session() as sess: with tf.compat.v2.io.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as f: frozen_graph = tf.compat.v1.GraphDef() frozen_graph.ParseFromString(f.read()) outputs = [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes' ] trt_graph = trt.create_inference_graph(input_graph_def=frozen_graph, outputs=outputs, max_batch_size=1, max_workspace_size_bytes=1 << 3, precision_mode="FP32", minimum_segment_size=5) def run_inference_for_single_image(image): with tf.compat.v1.Graph().as_default() as g: # Get handles to input and output tensors inputs_ = g.get_tensor_by_name('input_images:0') outputs_ = [o + ':0' for o in outputs] tf.import_graph_def(trt_graph, input_map={"input_images": inputs}, return_elements=outputs, name='') all_tensor_names = outputs_