def testEval(self): if not is_tensorrt_enabled(): return model_dir = test.test_src_dir_path( 'python/compiler/tensorrt/test/testdata/mnist') accuracy_tf_native = self._Run( is_training=False, use_trt=False, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_native: %f', accuracy_tf_native) self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3) if get_linked_tensorrt_version()[0] < 5: return accuracy_tf_trt = self._Run( is_training=False, use_trt=True, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_trt: %f', accuracy_tf_trt) self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3)
def testTrtGraphConverter_AllowEngineNativeSegmentExecution(self): if not is_tensorrt_enabled(): return np_input1, np_input2 = self._RandomInput([4, 1, 1]) # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) def _InputFn(): yield np_input1, np_input2 # Run TRT conversion and request an unreasonably large workspace. converter = self._CreateConverterV2( input_saved_model_dir, max_workspace_size_bytes=10 << 40) converter.convert() os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "False" with self.assertRaisesRegex( errors.AbortedError, r"User disallowed engine native segment execution"): converter.build(input_fn=_InputFn) os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "True" converter.build(input_fn=_InputFn)
def testTrtGraphConverter_OnlineConversion(self, device): """Test case for TF-TRT conversion using Grappler directly.""" if not is_tensorrt_enabled(): return conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace( precision_mode=trt_convert.TrtPrecisionMode.FP32, is_dynamic_op=True) config = self._GetConfigProto( rewriter_config=trt_convert.get_tensorrt_rewriter_config( conversion_params, is_v2=False)) with ops.Graph().as_default(): # Online conversion requires a frozen graph, so we reuse inp1 as the var # argument. inp1 = array_ops.placeholder( dtype=dtypes.float32, shape=[None, 1, 1], name="input1") inp2 = array_ops.placeholder( dtype=dtypes.float32, shape=[None, 1, 1], name="input2") if device: with ops.device(device): TrtConvertTest._GetGraph(inp1, inp2, inp1) else: TrtConvertTest._GetGraph(inp1, inp2, inp1) with self.session(config=config) as sess: self._TestRun(sess, batch_size=1)
def testTrtGraphConverter_StaticOp(self): if not is_tensorrt_enabled(): return output_saved_model_dir = self.mkdtemp() output_graph_def = self._ConvertGraphV1( output_saved_model_dir=output_saved_model_dir, maximum_cached_engines=1) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.session(config=self._GetConfigProto()) as sess: # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1) # Run with batch size 2, which exceed the max_batch_size, it should try # to fall back to TF function. self._TestRun(sess, 2) # Test the output SavedModel with ops.Graph().as_default(): with self.session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1) # Run with batch size 2, which exceed the max_batch_size, it should try # to fall back to TF function. self._TestRun(sess, 2)
def testTrtGraphConverter_DynamicOp(self): if not is_tensorrt_enabled(): return output_saved_model_dir = self.mkdtemp() output_graph_def = self._ConvertGraphV1( output_saved_model_dir=output_saved_model_dir, is_dynamic_op=True, maximum_cached_engines=2) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.session(config=self._GetConfigProto()) as sess: # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3) # Test the output SavedModel with ops.Graph().as_default(): with self.session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3)
def setUp(self): """Setup method.""" super(TfTrtIntegrationTestBase, self).setUp() warnings.simplefilter("always") if not is_tensorrt_enabled(): self.skipTest("Test requires TensorRT")
def testEval(self): if not is_tensorrt_enabled(): return # TODO(b/162447069): Enable the test for TRT 7.1.3. if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): return model_dir = test.test_src_dir_path( 'python/compiler/tensorrt/test/testdata/mnist') accuracy_tf_native = self._Run(is_training=False, use_trt=False, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_native: %f', accuracy_tf_native) self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3) if not trt_test.IsTensorRTVersionGreaterEqual(5): return accuracy_tf_trt = self._Run(is_training=False, use_trt=True, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_trt: %f', accuracy_tf_trt) self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3)
def testBuildInfo(self): self.assertEqual(build_info.build_info['is_rocm_build'], test.is_built_with_rocm()) self.assertEqual(build_info.build_info['is_cuda_build'], test.is_built_with_cuda()) self.assertEqual(build_info.build_info['is_tensorrt_build'], is_tensorrt_enabled())
def testTrtGraphConverter_AllowBuildAtRuntime(self, build_offline, allow_build_at_runtime): if not is_tensorrt_enabled(): return # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) np_input1 = ops.convert_to_tensor( np.ones([4, 1, 1]).astype(np.float32)) np_input2 = ops.convert_to_tensor( np.ones([4, 1, 1]).astype(np.float32)) def _InputFn(): yield np_input1, np_input2 # Run TRT conversion and request an unreasonably large workspace. converter = self._CreateConverterV2( input_saved_model_dir, allow_build_at_runtime=allow_build_at_runtime) converter.convert() if build_offline: converter.build(input_fn=_InputFn) # Output saved model dir. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) saved_model_loaded = load.load(output_saved_model_dir, tags=[tag_constants.SERVING]) graph_func = saved_model_loaded.signatures[_SAVED_MODEL_SIGNATURE_KEY] # Checks the TrtEngineOp(s) have the correct attribute(s). def _CheckFn(node): self.assertEqual(node.attr["_allow_build_at_runtime"].b, allow_build_at_runtime) self._CheckTrtOps(graph_func, _CheckFn) # If the engine was not build offline and the user set not to build at # runtime and not to run native segments. Then, it will report an error. if not build_offline and not allow_build_at_runtime: with self.assertRaisesRegex( errors.AbortedError, r"User disallowed engine native segment execution"): try: os.environ[ "TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "False" graph_func(inp1=np_input1, inp2=np_input2) finally: os.environ[ "TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "True" else: output = graph_func(inp1=np_input1, inp2=np_input2)["output_0"] self.assertEqual(output.shape, (4, 1, 1)) self.assertAllClose( np.asarray([5.0, 5.0, 5.0, 5.0]).reshape([4, 1, 1]), output)
def testGetTensorrtRewriterConfigTemplate(self): """Test case for TrtGraphConverter.get_tensorrt_rewriter_config().""" if not is_tensorrt_enabled(): return rewriter_config_with_trt = rewriter_config_pb2.RewriterConfig() rewriter_config_with_trt.optimizers.extend( ["constfold", "layout", "constfold"]) rewriter_config_with_trt.meta_optimizer_iterations = ( rewriter_config_pb2.RewriterConfig.ONE) optimizer = rewriter_config_with_trt.custom_optimizers.add() rewriter_config_with_trt.custom_optimizers.add().name = "constfold" optimizer.name = "TensorRTOptimizer" optimizer.parameter_map["minimum_segment_size"].i = 10 optimizer.parameter_map["max_batch_size"].i = 128 optimizer.parameter_map["is_dynamic_op"].b = True optimizer.parameter_map["max_workspace_size_bytes"].i = 1234 optimizer.parameter_map["precision_mode"].s = trt_convert._to_bytes( trt_convert.TrtPrecisionMode.INT8) optimizer.parameter_map["maximum_cached_engines"].i = 2 optimizer.parameter_map["use_calibration"].b = False optimizer.parameter_map["use_implicit_batch"].b = True conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace( rewriter_config_template=rewriter_config_with_trt) rewriter_cfg = trt_convert.get_tensorrt_rewriter_config( conversion_params=conversion_params) self.assertEqual(["constfold", "layout", "constfold"], rewriter_cfg.optimizers) self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE, rewriter_cfg.meta_optimizer_iterations) trt_optimizer = None for optimizer in rewriter_cfg.custom_optimizers: if optimizer.name == "TensorRTOptimizer": self.assertIsNone(trt_optimizer) trt_optimizer = optimizer self.assertIsNotNone(trt_optimizer) for key in [ "minimum_segment_size", "max_batch_size", "is_dynamic_op", "max_workspace_size_bytes", "precision_mode", "maximum_cached_engines" ]: self.assertIn(key, trt_optimizer.parameter_map) self.assertEqual(10, trt_optimizer.parameter_map["minimum_segment_size"].i) self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i) self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b) self.assertEqual( 1234, trt_optimizer.parameter_map["max_workspace_size_bytes"].i) self.assertEqual(trt_convert._to_bytes("INT8"), trt_optimizer.parameter_map["precision_mode"].s) self.assertEqual( 2, trt_optimizer.parameter_map["maximum_cached_engines"].i) self.assertEqual(False, trt_optimizer.parameter_map["use_calibration"].b) self.assertEqual(True, trt_optimizer.parameter_map["use_implicit_batch"].b)
def testTrtGraphConverter_ShapeOp_v2(self): """Test case for TrtGraphConverterV2 with ShapeOp.""" if not is_tensorrt_enabled(): return # TODO(b/185944425): enable the test for TRT before TRT 7. ver = get_linked_tensorrt_version() if ver[0] < 7: return class ShapeOpModel(tracking.AutoTrackable): def __init__(self): self.v = None @def_function.function(input_signature=[ tensor_spec.TensorSpec(shape=[None, None], dtype=dtypes.float32) ]) def run(self, x): q = x + 1 q_shape = array_ops.shape(q) return array_ops.identity(q_shape, name="output") np_input = np.random.random_sample([5, 3]).astype(np.float32) def _InputFunc(): yield (np_input, ) # Create the SavedModel. root = ShapeOpModel() expected_output = root.run(np_input) input_saved_model_dir = self.mkdtemp() save.save(root, input_saved_model_dir, signatures=root.run) # Convert the graph to TF-TRT. conv_params = trt_convert.TrtConversionParams(minimum_segment_size=2) converter = trt_convert.TrtGraphConverterV2( input_saved_model_dir=input_saved_model_dir, conversion_params=conv_params, use_dynamic_shape=True) converter.convert() # Build the graph with the input generator. This runs the TRTEngineOp native # segment. converter.build(_InputFunc) output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) root_with_trt = load.load(output_saved_model_dir) converted_signature = root_with_trt.signatures["serving_default"] # Check that the graph is converted to one TRTEngineOp. self._CheckTrtOps(converted_signature) # Run the graph. output_with_trt = converted_signature( x=ops.convert_to_tensor(np_input)) # Check the result of the run. self.assertAllClose(expected_output, list(output_with_trt.values())[0])
def testRetainSignatureInfo_OneOutputSignatureKey(self): if not is_tensorrt_enabled(): return class _Model(tracking.AutoTrackable): @def_function.function(input_signature=[]) def run(self): return {"my_output": array_ops.constant(1.0)} self._CompareSavedModel(_Model)
def testTrtGraphConverter_DestroyEngineCache(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return np_input1, np_input2 = self._RandomInput([4, 1, 1]) # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. converter = self._CreateConverterV2(input_saved_model_dir) converter.convert() trt_engine_name = self._GetUniqueTRTEngineOp( converter._converted_graph_def).name def _InputFn(): yield np_input1, np_input2 converter.build(input_fn=_InputFn) # Populate the TRT engine cache. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) def _DestroyCache(): with ops.device("GPU:0"): handle = gen_trt_ops.create_trt_resource_handle( resource_name=trt_engine_name) gen_resource_variable_ops.destroy_resource_op( handle, ignore_lookup_error=False) with self.assertRaisesRegexp(errors.NotFoundError, r"Resource .* does not exist."): _DestroyCache() # Load the converted model and make sure the engine cache is populated by # default. root = load.load(output_saved_model_dir) _DestroyCache() with self.assertRaisesRegexp(errors.NotFoundError, r"Resource .* does not exist."): _DestroyCache() # Load the converted model again and make sure the engine cache is destroyed # when the model goes out of scope. root = load.load(output_saved_model_dir) del root gc.collect() # Force GC to destroy the TRT engine cache. with self.assertRaisesRegexp(errors.NotFoundError, r"Resource .* does not exist."): _DestroyCache()
def testRetainSignatureInfo_OneInput(self): if not is_tensorrt_enabled(): return class _Model(tracking.AutoTrackable): @def_function.function(input_signature=[ tensor_spec.TensorSpec(shape=[None, 1], dtype=dtypes.float32) ]) def run(self, inp): return inp + inp * inp self._CompareSavedModel(_Model)
def testBuildInfo(self): self.assertEqual(build_info.build_info['is_rocm_build'], test.is_built_with_rocm()) self.assertEqual(build_info.build_info['is_cuda_build'], test.is_built_with_cuda()) # TODO(b/173044576): make the test work for Windows. if platform.system() != 'Windows': # pylint: disable=g-import-not-at-top from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import is_tensorrt_enabled self.assertEqual(build_info.build_info['is_tensorrt_build'], is_tensorrt_enabled())
def testTrtGraphConverter_OfflineConversion(self, device): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return for need_calibration in [False, True]: # Use GraphDef as input. self._TestTrtGraphConverter(device) # Use SavedModel as input. self._TestTrtGraphConverter(device, output_saved_model_dir=self.mkdtemp(), need_calibration=need_calibration)
def testTrtGraphConverter_StaticConversionNotSupportedInV2(self): """Test case for trt_convert.TrtGraphConverter() using static mode.""" if not is_tensorrt_enabled(): return # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. with self.assertRaisesRegexp( ValueError, r"Option is_dynamic_op=False is not supported in TF 2.0, " "please set it to True instead."): self._CreateConverterV2(input_saved_model_dir, is_dynamic_op=False)
def testTrtGraphConverter_MinimumSegmentSize(self): if not is_tensorrt_enabled(): return output_graph_def = self._ConvertGraphV1(minimum_segment_size=7) node_name_to_op = {node.name: node.op for node in output_graph_def.node} self.assertEqual( { "add/ReadVariableOp": "Const", "input1": "Placeholder", "input2": "Placeholder", "add": "AddV2", "mul": "Mul", "add_1": "AddV2", "add_2": "AddV2", "output": "Identity" }, node_name_to_op)
def testRetainSignatureInfo_TwoOutputSignatureKeys(self): if not is_tensorrt_enabled(): return class _Model(tracking.AutoTrackable): @def_function.function(input_signature=[ tensor_spec.TensorSpec(shape=[None, 1], dtype=dtypes.float32) ]) def run(self, inp): # Here the keys are not ordered lexicographically on purpose. return { "output_b": array_ops.constant(1.0), "output_a": inp + inp * inp } self._CompareSavedModel(_Model)
def testBackwardCompatibility(self): """Load and execute a model that was saved in TF2.0.""" if not is_tensorrt_enabled(): return model_dir = test.test_src_dir_path( "python/compiler/tensorrt/test/testdata/tftrt_2.0_saved_model") saved_model_loaded = load.load(model_dir, tags=[tag_constants.SERVING]) graph_func = saved_model_loaded.signatures[ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] np_input1 = ops.convert_to_tensor(np.ones([4, 1, 1]).astype(np.float32)) np_input2 = ops.convert_to_tensor(np.ones([4, 1, 1]).astype(np.float32)) output = graph_func(input1=np_input1, input2=np_input2)["output_0"] self.assertEqual(output.shape, (4, 1, 1)) self.assertAllClose( np.asarray([5.0, 5.0, 5.0, 5.0]).reshape([4, 1, 1]), output)
def testGetTensorrtRewriterConfig(self): """Test case for TrtGraphConverter.get_tensorrt_rewriter_config().""" if not is_tensorrt_enabled(): return conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace( max_batch_size=128, max_workspace_size_bytes=1234, precision_mode="INT8", minimum_segment_size=10, is_dynamic_op=True, maximum_cached_engines=2) rewriter_cfg = trt_convert.get_tensorrt_rewriter_config( conversion_params=conversion_params) self.assertEqual(["constfold", "layout", "constfold"], rewriter_cfg.optimizers) self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE, rewriter_cfg.meta_optimizer_iterations) trt_optimizer = None for optimizer in rewriter_cfg.custom_optimizers: if optimizer.name == "TensorRTOptimizer": self.assertTrue(trt_optimizer is None) trt_optimizer = optimizer self.assertTrue(trt_optimizer is not None) for key in [ "minimum_segment_size", "max_batch_size", "is_dynamic_op", "max_workspace_size_bytes", "precision_mode", "maximum_cached_engines" ]: self.assertTrue(key in trt_optimizer.parameter_map) self.assertEqual(10, trt_optimizer.parameter_map["minimum_segment_size"].i) self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i) self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b) self.assertEqual( 1234, trt_optimizer.parameter_map["max_workspace_size_bytes"].i) self.assertEqual(trt_convert._to_bytes("INT8"), trt_optimizer.parameter_map["precision_mode"].s) self.assertEqual( 2, trt_optimizer.parameter_map["maximum_cached_engines"].i)
def testTrtGraphConverter_AllowEngineNativeSegmentExecution(self): if not is_tensorrt_enabled(): return # This test will not work anymore with TRT >= 8. TensorRT does not # preallocate anymore the max_workspace_size_bytes, but rather allocates as # it needs up to this value. # TODO: update the unittest to make this TRTEngine creation fail with TRT8. ver = get_linked_tensorrt_version() if ver[0] >= 8: return np_input1, np_input2 = self._RandomInput([4, 1, 1]) # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) def _InputFn(): yield np_input1, np_input2 # Run TRT conversion and request an unreasonably large workspace. converter = self._CreateConverterV2( input_saved_model_dir, max_workspace_size_bytes=10 << 40) converter.convert() os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "False" with self.assertRaisesRegex( errors.AbortedError, r"User disallowed engine native segment execution"): try: converter.build(input_fn=_InputFn) finally: # Always reset the environment variable. os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "True" converter.build(input_fn=_InputFn)
for (precision_mode, convert_online, dynamic_engine, use_calibration) in opts: conversion = "OnlineConversion" if convert_online else "OfflineConversion" engine_type = "DynamicEngine" if dynamic_engine else "StaticEngine" calibration_type = "UseCalibration" if use_calibration else "NoCalibration" test_name = "%s_%s_%s_%s_%s" % ("testTfTrtV2" if is_v2 else "testTfTrt", conversion, engine_type, precision_mode, calibration_type) run_params = RunParams(convert_online=convert_online, precision_mode=precision_mode, dynamic_engine=dynamic_engine, test_name=test_name, use_calibration=use_calibration, is_v2=is_v2) if is_v2: setattr(test_class, test_name, test_util.run_v2_only(_GetTest(run_params))) else: setattr(test_class, test_name, test_util.run_v1_only("", _GetTest(run_params))) def _AddTests(test_class): """Adds test methods to TfTrtIntegrationTestBase.""" _AddTestsFor(test_class, is_v2=False) _AddTestsFor(test_class, is_v2=True) if is_tensorrt_enabled(): _AddTests(TfTrtIntegrationTestBase)
# use_trt=False, # batch_size=128, # num_epochs=100, # model_dir=model_dir) def testEval(self): model_dir = test.test_src_dir_path( 'python/compiler/tensorrt/test/testdata/mnist') accuracy_tf_native = self._Run( is_training=False, use_trt=False, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_native: %f', accuracy_tf_native) self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3) accuracy_tf_trt = self._Run( is_training=False, use_trt=True, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_trt: %f', accuracy_tf_trt) self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3) if __name__ == '__main__' and is_tensorrt_enabled(): test.main()
def testTrtGraphConverter_DynamicConversion_v2(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return np_input1, np_input2 = self._RandomInput([4, 1, 1]) # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() expected_output = root.run(np_input1, np_input2) save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. converter = self._CreateConverterV2(input_saved_model_dir) converter.convert() # Verify the converted GraphDef and ConcreteFunction. self._CheckTrtOps(converter._converted_func) # pylint: disable=protected-access trt_engine_name = self._GetUniqueTRTEngineOp( converter._converted_graph_def).name # Save the converted model without any TRT engine cache. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) unexpected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine." + trt_engine_name) self.assertFalse(os.path.exists(unexpected_asset_file)) # Run the converted function to populate the engine cache. def _InputFn(): yield np_input1, np_input2 converter.build(input_fn=_InputFn) # Save the converted model again with serialized engine cache. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) expected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine." + trt_engine_name) self.assertTrue(os.path.exists(expected_asset_file)) self.assertTrue(os.path.getsize(expected_asset_file)) del converter gc.collect() # Force GC to destroy the TRT engine cache. # Load and verify the converted model. # # TODO(laigd): the name of the new input_signature of the # `root_with_trt.run` function is empty string (originally was None), # investigate why. root_with_trt = load.load(output_saved_model_dir) # TODO(laigd): `root_with_trt.run` is still using the original graph without # trt. Consider changing that. # self._CheckTrtOps(root_with_trt.run.get_concrete_function()) converted_signature = root_with_trt.signatures[_SAVED_MODEL_SIGNATURE_KEY] self._CheckTrtOps(converted_signature) output_with_trt = converted_signature( inp1=ops.convert_to_tensor(np_input1), inp2=ops.convert_to_tensor(np_input2)) # The output of running the converted signature is a dict due to # compatibility reasons with V1 SavedModel signature mechanism. self.assertAllClose( expected_output, list(output_with_trt.values())[0], atol=1e-6, rtol=1e-6) del root_with_trt gc.collect() # Force GC to destroy the TRT engine cache.
self._CheckTrtOps(converter._converted_func) trt_engine_name = self._GetUniqueTRTEngineOp( converter._converted_graph_def).name # Save the converted model with or without any TRT engine cache # based on the value of save_engine_flag. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir, save_gpu_specific_engines=save_engine_flag) expected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine." + trt_engine_name) self.assertTrue(os.path.exists(expected_asset_file)) if save_engine_flag: # engine is saved so we expect engine data self.assertTrue(os.path.getsize(expected_asset_file)) else: # engine is not saved so files should be empty self.assertFalse(os.path.getsize(expected_asset_file)) del converter gc.collect() # Force GC to destroy the TRT engine cache. if __name__ == "__main__" and is_tensorrt_enabled(): test.main()
def testTrtGraphConverter_Int8Conversion_v2(self): if not is_tensorrt_enabled(): return np_input1, np_input2 = self._RandomInput([4, 1, 1]) # Create a model and save it. input_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) root = self._GetModelForV2() expected_output = root.run(np_input1, np_input2) save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. converter = self._CreateConverterV2( input_saved_model_dir, precision_mode=trt_convert.TrtPrecisionMode.INT8, maximum_cached_engines=3) # Convert and perform INT8 calibration def _CalibrationInputFn(): yield np_input1, np_input2 converter.convert(calibration_input_fn=_CalibrationInputFn) trt_engine_name = self._GetUniqueTRTEngineOp( converter._converted_graph_def).name def _CheckFn(node): self.assertTrue(len(node.attr["calibration_data"].s), node.name) # Verify the converted GraphDef. self._CheckTrtOps(converter._converted_func, _CheckFn) # pylint: disable=protected-access # Build another engine with different batch size. def _InputFn(): yield self._RandomInput([5, 1, 1]) converter.build(input_fn=_InputFn) # Save the converted model. # TODO(laigd): check that it should contain two engines. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) expected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine." + trt_engine_name) self.assertTrue(os.path.exists(expected_asset_file)) self.assertTrue(os.path.getsize(expected_asset_file)) del converter gc.collect() # Force GC to destroy the TRT engine cache. # Load and verify the converted model. root_with_trt = load.load(output_saved_model_dir) converted_signature = root_with_trt.signatures[_SAVED_MODEL_SIGNATURE_KEY] self._CheckTrtOps(converted_signature, _CheckFn) output_with_trt = converted_signature( inp1=ops.convert_to_tensor(np_input1), inp2=ops.convert_to_tensor(np_input2)) self.assertEqual(1, len(output_with_trt)) # The output of running the converted signature is a dict due to # compatibility reasons with V1 SavedModel signature mechanism. self.assertAllClose( expected_output, list(output_with_trt.values())[0], atol=1e-6, rtol=1e-6) # Run with an input of different batch size. It should build a new engine # using calibration table. # TODO(laigd): check that it should contain three engines. np_input1, np_input2 = self._RandomInput([6, 1, 1]) converted_signature( inp1=ops.convert_to_tensor(np_input1), inp2=ops.convert_to_tensor(np_input2)) del root_with_trt gc.collect() # Force GC to destroy the TRT engine cache.