def GetConversionParams(self, run_params): """Return a TrtConversionParams for test.""" batch_list = [] for dims_list in self._GetParamsCached().input_dims: assert dims_list # Each list of shapes should have same batch size. input_batches = [dims[0] for dims in dims_list] assert max(input_batches) == min(input_batches) batch_list.append(input_batches[0]) conversion_params = trt_convert.TrtConversionParams( # We use the minimum of all the batch sizes, so when multiple different # input shapes are provided it'll always create new engines in the # cache, and we can therefore test the cache behavior. rewriter_config_template=None, max_workspace_size_bytes=1 << 25, precision_mode=run_params.precision_mode, minimum_segment_size=2, is_dynamic_op=run_params.dynamic_engine, maximum_cached_engines=1, use_calibration=run_params.use_calibration, use_function_backup=False, max_batch_size=min(batch_list)) return conversion_params._replace( use_function_backup=IsQuantizationWithCalibration( conversion_params))
def _GetFunc(self, use_trt, model_dir, use_dynamic_shape): """Gets the mnist function. Args: use_trt: whether use TF-TRT to convert the graph. model_dir: the model directory to load the checkpoints. use_dynamic_shape: whether to run the TF-TRT conversion in dynamic shape mode. Returns: The mnist model function. """ with tempfile.TemporaryDirectory() as tmpdir: saved_model_dir = os.path.join(tmpdir, 'mnist') self._SaveModel(model_dir, saved_model_dir) if use_trt: conv_params = trt_convert.TrtConversionParams( precision_mode='FP16', minimum_segment_size=2, max_workspace_size_bytes=1 << 28, maximum_cached_engines=1) converter = trt_convert.TrtGraphConverterV2( input_saved_model_dir=saved_model_dir, conversion_params=conv_params, use_dynamic_shape=use_dynamic_shape, dynamic_shape_profile_strategy='ImplicitBatchModeCompatible') converter.convert() func = converter._converted_func else: saved_model_loaded = saved_model_load( saved_model_dir, tags=[tag_constants.SERVING]) func = saved_model_loaded.signatures[ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] return func
def export_serving_model(yolo, path, warmup_path=None, with_tensorrt=False): overwrite_path(path) tf.saved_model.save(yolo.yolo_model, path) if with_tensorrt: params = trt.TrtConversionParams( rewriter_config_template=None, max_workspace_size_bytes=trt.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES, precision_mode=trt.TrtPrecisionMode.FP16, minimum_segment_size=3, is_dynamic_op=True, maximum_cached_engines=1, use_calibration=True, max_batch_size=1) converter = trt.TrtGraphConverterV2(input_saved_model_dir=path, conversion_params=params) converter.convert() tf.io.gfile.rmtree(path) converter.save(path) asset_extra = os.path.join(path, "assets.extra") tf.io.gfile.mkdir(asset_extra) with tf.io.TFRecordWriter( os.path.join(asset_extra, "tf_serving_warmup_requests")) as writer: request = predict_pb2.PredictRequest() request.model_spec.name = 'detection' request.model_spec.signature_name = 'serving_default' if warmup_path is None: warmup_path = input('Please enter warm up image path:') image = open(warmup_path, 'rb').read() image_data = np.expand_dims(image, 0) request.inputs['predict_image'].CopyFrom( tf.compat.v1.make_tensor_proto(image_data)) log = prediction_log_pb2.PredictionLog( predict_log=prediction_log_pb2.PredictLog(request=request)) writer.write(log.SerializeToString())
def convert_to_TFTRT(saved_model_path, target_path, max_workspace_size_bytes=1 << 22, precision='FP16', minimum_segment_size=3, is_dynamic_op=True, use_calibration=True, max_batch_size=32, calibration_input_fn=None): ''' Args: precision: 'FP32', 'FP16' or 'INT8' calibration_input_fn: INT8 calibration is needed. A generator function that yields input data as a list or tuple, which will be used to execute the converted signature for calibration. All the returned input data should have the same shape. Source: https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html ''' conversion_params = trt.TrtConversionParams( rewriter_config_template=None, maximum_cached_engines=1, max_workspace_size_bytes=max_workspace_size_bytes, precision_mode=precision, minimum_segment_size=minimum_segment_size, is_dynamic_op=is_dynamic_op, use_calibration=use_calibration, max_batch_size=max_batch_size) converter = trt.TrtGraphConverterV2(input_saved_model_dir=saved_model_path, conversion_params=conversion_params) converter.convert() converter.save(target_path)
def testTrtGraphConverter_ShapeOp_v2(self): """Test case for TrtGraphConverterV2 with ShapeOp.""" if not is_tensorrt_enabled(): return # TODO(b/185944425): enable the test for TRT before TRT 7. ver = get_linked_tensorrt_version() if ver[0] < 7: return class ShapeOpModel(tracking.AutoTrackable): def __init__(self): self.v = None @def_function.function(input_signature=[ tensor_spec.TensorSpec(shape=[None, None], dtype=dtypes.float32) ]) def run(self, x): q = x + 1 q_shape = array_ops.shape(q) return array_ops.identity(q_shape, name="output") np_input = np.random.random_sample([5, 3]).astype(np.float32) def _InputFunc(): yield (np_input, ) # Create the SavedModel. root = ShapeOpModel() expected_output = root.run(np_input) input_saved_model_dir = self.mkdtemp() save.save(root, input_saved_model_dir, signatures=root.run) # Convert the graph to TF-TRT. conv_params = trt_convert.TrtConversionParams(minimum_segment_size=2) converter = trt_convert.TrtGraphConverterV2( input_saved_model_dir=input_saved_model_dir, conversion_params=conv_params, use_dynamic_shape=True) converter.convert() # Build the graph with the input generator. This runs the TRTEngineOp native # segment. converter.build(_InputFunc) output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) root_with_trt = load.load(output_saved_model_dir) converted_signature = root_with_trt.signatures["serving_default"] # Check that the graph is converted to one TRTEngineOp. self._CheckTrtOps(converted_signature) # Run the graph. output_with_trt = converted_signature( x=ops.convert_to_tensor(np_input)) # Check the result of the run. self.assertAllClose(expected_output, list(output_with_trt.values())[0])
def testTrtGraphConverter_ShapeOp_Int32InputOutput_v2(self): """Testing ShapeOp and int32 values as engine input and outpu.""" class ShapeOpModel(tracking.AutoTrackable): def __init__(self): self.v = None @def_function.function(input_signature=[ tensor_spec.TensorSpec(shape=[None, None], dtype=dtypes.float32) ]) def run(self, x): q = x + 1 q_shape = array_ops.shape(q) # Add an OP that is not supported by TF-TRT. This allows TF-TRT to build # two engines. The first engine produces an int32 output and the second # engines has an int32 input and an int32 output. q = nn_ops.data_format_vec_permute(q_shape, src_format="NHWC", dst_format="NCHW") q = q * 2 return array_ops.identity(q, name="output") np_input = np.random.random_sample([5, 3]).astype(np.float32) def _InputFunc(): yield (np_input, ) # Create the SavedModel. root = ShapeOpModel() expected_output = root.run(np_input) input_saved_model_dir = self.mkdtemp() save.save(root, input_saved_model_dir, signatures=root.run) # Convert the graph to TF-TRT. conv_params = trt_convert.TrtConversionParams(minimum_segment_size=2) converter = trt_convert.TrtGraphConverterV2( input_saved_model_dir=input_saved_model_dir, use_dynamic_shape=True, **conv_params._asdict()) converter.convert() # Build the graph with the input generator. This runs the TRTEngineOp native # segment. converter.build(_InputFunc) output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) root_with_trt = load.load(output_saved_model_dir) converted_signature = root_with_trt.signatures["serving_default"] # Check that the graph is converted to two TRTEngineOps. self._CheckTrtOps(converted_signature, num_engines=2) # Run the graph. output_with_trt = converted_signature( x=ops.convert_to_tensor(np_input)) # Check the result of the run. self.assertAllClose(expected_output, list(output_with_trt.values())[0])
def main(args): params = trt.TrtConversionParams(precision_mode="FP16", use_calibration=False) converter = trt.TrtGraphConverterV2(input_saved_model_dir=args.saved_model, conversion_params=params) converter.convert() converter.build(representative_dataset_gen) converter.save(args.saved_model_trt)
def GetConversionParams(self, run_params): """Returns a TrtConversionParams for test.""" conversion_params = trt_convert.TrtConversionParams( # We use the minimum of all the batch sizes, so when multiple different # input shapes are provided it'll always create new engines in the # cache, and we can therefore test the cache behavior. max_workspace_size_bytes=1 << 25, precision_mode=run_params.precision_mode, minimum_segment_size=2, maximum_cached_engines=1, use_calibration=run_params.use_calibration) return conversion_params
def export_model(model_dir, prec, tf_trt_model_dir=None): model = tf.keras.models.load_model( os.path.join(model_dir, f'saved_model_{prec}')) input_shape = [1, 572, 572, 1] dummy_input = tf.constant( tf.zeros(input_shape, dtype=tf.float32 if prec == "fp32" else tf.float16)) _ = model(dummy_input, training=False) trt_prec = trt.TrtPrecisionMode.FP32 if prec == "fp32" else trt.TrtPrecisionMode.FP16 converter = trt.TrtGraphConverterV2( input_saved_model_dir=os.path.join(model_dir, f'saved_model_{prec}'), conversion_params=trt.TrtConversionParams(precision_mode=trt_prec), ) converter.convert() tf_trt_model_dir = tf_trt_model_dir or f'/tmp/tf-trt_model_{prec}' converter.save(tf_trt_model_dir) print(f"TF-TRT model saved at {tf_trt_model_dir}")
def _GetFunc(self, use_trt, model_dir, use_dynamic_shape): """Gets the mnist function. Args: use_trt: whether use TF-TRT to convert the graph. model_dir: the model directory to load the checkpoints. use_dynamic_shape: whether to run the TF-TRT conversion in dynamic shape mode. Returns: The mnist model function. """ with tempfile.TemporaryDirectory() as tmpdir: saved_model_dir = os.path.join(tmpdir, 'mnist') self._SaveModel(model_dir, saved_model_dir) if use_trt: conv_params = trt_convert.TrtConversionParams( precision_mode='FP16', minimum_segment_size=2, max_workspace_size_bytes=( trt_convert.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES), maximum_cached_engines=1) converter = trt_convert.TrtGraphConverterV2( input_saved_model_dir=saved_model_dir, use_dynamic_shape=use_dynamic_shape, dynamic_shape_profile_strategy='ImplicitBatchModeCompatible', **conv_params._asdict()) converter.convert() try: line_length = max(160, os.get_terminal_size().columns) except OSError: line_length = 160 converter.summary(line_length=line_length, detailed=True) func = converter._converted_func else: saved_model_loaded = saved_model_load( saved_model_dir, tags=[tag_constants.SERVING]) func = saved_model_loaded.signatures[ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] return func
import imagenet train_ds = imagenet.load_ds( 2 * (args.input, )).take(192).map(lambda x, y: tf.cast(x, 'uint8')) train_ds = train_ds.batch(64) data_len = 50000 // 64 count = 0 print('Training:') for batch in train_ds: print('\r%d/%d' % (count, data_len), end='') count += 1 yield (batch, ) params = trt.TrtConversionParams(rewriter_config_template=None, max_workspace_size_bytes=1 << 30, precision_mode=args.mode, minimum_segment_size=3, is_dynamic_op=True, maximum_cached_engines=1, use_calibration=True, max_batch_size=64, allow_build_at_runtime=True) converter = trt.TrtGraphConverterV2(input_saved_model_dir=args.mod_path, conversion_params=params) calib_input = input_fn if args.mode == 'INT8' else None converter.convert(calib_input) if args.build: converter.build(input_fn=input_fn) converter.save(args.out_path)