def testTrtGraphConverter_OnlineConversion(self, device): """Test case for TF-TRT conversion using Grappler directly.""" if not is_tensorrt_enabled(): return conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace( precision_mode=trt_convert.TrtPrecisionMode.FP32, is_dynamic_op=True) config = self._GetConfigProto( rewriter_config=trt_convert.get_tensorrt_rewriter_config( conversion_params, is_v2=False)) with ops.Graph().as_default(): # Online conversion requires a frozen graph, so we reuse inp1 as the var # argument. inp1 = array_ops.placeholder(dtype=dtypes.float32, shape=[None, 1, 1], name="input1") inp2 = array_ops.placeholder(dtype=dtypes.float32, shape=[None, 1, 1], name="input2") if device: with ops.device(device): TrtConvertTest._GetGraph(inp1, inp2, inp1) else: TrtConvertTest._GetGraph(inp1, inp2, inp1) with self.session(config=config) as sess: self._TestRun(sess, batch_size=1)
def _TestStaticOp(self): if not is_tensorrt_enabled(): return input_saved_model_dir = self.mkdtemp() output_saved_model_dir = self.mkdtemp() self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = self._ConvertGraph( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, maximum_cached_engines=2) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.session(config=self._GetConfigProto()) as sess: # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, expect_engine_is_run=True) # Run with batch size 2, which exceed the max_batch_size, it should try # to fall back to TF function. self._TestRun(sess, 2, expect_engine_is_run=False) # Test the output SavedModel with ops.Graph().as_default(): with self.session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, expect_engine_is_run=True) # Run with batch size 2, which exceed the max_batch_size, it should try # to fall back to TF function. self._TestRun(sess, 2, expect_engine_is_run=False)
def testEval(self): if not is_tensorrt_enabled(): return model_dir = test.test_src_dir_path('python/compiler/tensorrt/test/testdata') accuracy_tf_native = self._Run( is_training=False, use_trt=False, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_native: %f', accuracy_tf_native) self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3) if get_linked_tensorrt_version()[0] < 5: return accuracy_tf_trt = self._Run( is_training=False, use_trt=True, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_trt: %f', accuracy_tf_trt) self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3)
def testTrtGraphConverter_StaticConversion_v2(self): """Test case for trt_convert.TrtGraphConverter() using static mode.""" if not is_tensorrt_enabled(): return np_input = np.random.random_sample([4, 1, 1]).astype(np.float32) # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() expected_output = root.run(np_input) save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. converter = self._CreateConverterV2(input_saved_model_dir, max_batch_size=4) converted_func = converter.convert() def _CheckTrtOps(graph_def): trt_op_names = [ node.name for node in graph_def.node if node.op == "TRTEngineOp" ] for func in graph_def.library.function: for node in func.node_def: if node.op == "TRTEngineOp": trt_op_names.append(node.name) self.assertTrue(len(node.attr["serialized_segment"].s), node.name) self.assertEqual(1, len(trt_op_names)) self.assertIn("TRTEngineOp_0", trt_op_names[0]) # Verify the converted GraphDef and ConcreteFunction. self.assertIsInstance(converted_func, def_function.Function) converted_concrete_func = converted_func.get_concrete_function( tensor_spec.TensorSpec(shape=[None, 1, 1], dtype=dtypes.float32)) _CheckTrtOps(converted_concrete_func.graph.as_graph_def()) # Save the converted model with the statically-built engine inlined. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) unexpected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine.TRTEngineOp_0") self.assertFalse(os.path.exists(unexpected_asset_file)) # Load and verify the converted model. root_with_trt = load.load(output_saved_model_dir) converted_signature = root_with_trt.signatures[ _SAVED_MODEL_SIGNATURE_KEY] _CheckTrtOps(converted_signature.graph.as_graph_def()) output_with_trt = converted_signature(ops.convert_to_tensor(np_input)) # The output of running the converted signature is a dict due to # compatibility reasons with V1 SavedModel signature mechanism. output_with_trt = output_with_trt.values()[0] self.assertAllClose(expected_output, output_with_trt, atol=1e-6, rtol=1e-6)
def testTrtGraphConverter_DynamicOp(self): if not is_tensorrt_enabled(): return output_saved_model_dir = self.mkdtemp() output_graph_def = self._ConvertGraphV1( output_saved_model_dir=output_saved_model_dir, is_dynamic_op=True, maximum_cached_engines=2) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.session(config=self._GetConfigProto()) as sess: # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3) # Test the output SavedModel with ops.Graph().as_default(): with self.session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3)
def testEval(self): if not is_tensorrt_enabled(): return model_dir = test.test_src_dir_path('python/compiler/tensorrt/test/testdata') accuracy_tf_native = self._Run( is_training=False, use_trt=False, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_native: %f', accuracy_tf_native) self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3) if get_linked_tensorrt_version()[0] < 5: return accuracy_tf_trt = self._Run( is_training=False, use_trt=True, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_trt: %f', accuracy_tf_trt) self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3)
def testGetTensorrtRewriterConfigTemplate(self): """Test case for TrtGraphConverter.get_tensorrt_rewriter_config().""" if not is_tensorrt_enabled(): return rewriter_config_with_trt = rewriter_config_pb2.RewriterConfig() rewriter_config_with_trt.optimizers.extend( ["constfold", "layout", "constfold"]) rewriter_config_with_trt.meta_optimizer_iterations = ( rewriter_config_pb2.RewriterConfig.ONE) optimizer = rewriter_config_with_trt.custom_optimizers.add() rewriter_config_with_trt.custom_optimizers.add().name = "constfold" optimizer.name = "TensorRTOptimizer" optimizer.parameter_map["minimum_segment_size"].i = 10 optimizer.parameter_map["max_batch_size"].i = 128 optimizer.parameter_map["is_dynamic_op"].b = True optimizer.parameter_map["max_workspace_size_bytes"].i = 1234 optimizer.parameter_map["precision_mode"].s = trt_convert._to_bytes( trt_convert.TrtPrecisionMode.INT8) optimizer.parameter_map["maximum_cached_engines"].i = 2 optimizer.parameter_map["use_calibration"].b = False optimizer.parameter_map["use_implicit_batch"].b = True conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace( rewriter_config_template=rewriter_config_with_trt) rewriter_cfg = trt_convert.get_tensorrt_rewriter_config( conversion_params=conversion_params) self.assertEqual(["constfold", "layout", "constfold"], rewriter_cfg.optimizers) self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE, rewriter_cfg.meta_optimizer_iterations) trt_optimizer = None for optimizer in rewriter_cfg.custom_optimizers: if optimizer.name == "TensorRTOptimizer": self.assertIsNone(trt_optimizer) trt_optimizer = optimizer self.assertIsNotNone(trt_optimizer) for key in [ "minimum_segment_size", "max_batch_size", "is_dynamic_op", "max_workspace_size_bytes", "precision_mode", "maximum_cached_engines" ]: self.assertIn(key, trt_optimizer.parameter_map) self.assertEqual(10, trt_optimizer.parameter_map["minimum_segment_size"].i) self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i) self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b) self.assertEqual( 1234, trt_optimizer.parameter_map["max_workspace_size_bytes"].i) self.assertEqual(trt_convert._to_bytes("INT8"), trt_optimizer.parameter_map["precision_mode"].s) self.assertEqual( 2, trt_optimizer.parameter_map["maximum_cached_engines"].i) self.assertEqual(False, trt_optimizer.parameter_map["use_calibration"].b) self.assertEqual(True, trt_optimizer.parameter_map["use_implicit_batch"].b)
def testRetainSignatureInfo_OneOutputSignatureKey(self): if not is_tensorrt_enabled(): return class _Model(tracking.AutoTrackable): @def_function.function(input_signature=[]) def run(self): return {"my_output": array_ops.constant(1.0)} self._CompareSavedModel(_Model)
def testRetainSignatureInfo_OneInput(self): if not is_tensorrt_enabled(): return class _Model(tracking.AutoTrackable): @def_function.function(input_signature=[ tensor_spec.TensorSpec(shape=[None, 1], dtype=dtypes.float32) ]) def run(self, inp): return inp + inp * inp self._CompareSavedModel(_Model)
def testTrtGraphConverter_OfflineConversion(self, device): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return for need_calibration in [False, True]: # Use GraphDef as input. self._TestTrtGraphConverter(device) # Use SavedModel as input. self._TestTrtGraphConverter(device, output_saved_model_dir=self.mkdtemp(), need_calibration=need_calibration)
def testTrtGraphConverter_MinimumSegmentSize(self): if not is_tensorrt_enabled(): return output_graph_def = self._ConvertGraph(minimum_segment_size=5) node_name_to_op = {node.name: node.op for node in output_graph_def.node} self.assertEqual({ "v1/read": "Const", "input": "Placeholder", "add": "Add", "mul": "Mul", "add_1": "Add", "output": "Identity" }, node_name_to_op)
def _TestStaticOp(self, use_function_backup): if not is_tensorrt_enabled(): return tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir3") output_saved_model_dir = os.path.join(tmp_dir, "out_dir3") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = self._ConvertGraph( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, maximum_cached_engines=2, # This is noop, added just for testing. use_function_backup=use_function_backup) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.session(config=self._GetConfigProto()) as sess: # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun( sess, 1, use_function_backup=use_function_backup, expect_engine_is_run=True) # Run with batch size 2, which exceed the max_batch_size, it should try # to fall back to TF function. self._TestRun( sess, 2, use_function_backup=use_function_backup, expect_engine_is_run=False) # Test the output SavedModel with ops.Graph().as_default(): with self.session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun( sess, 1, use_function_backup=use_function_backup, expect_engine_is_run=True) # Run with batch size 2, which exceed the max_batch_size, it should try # to fall back to TF function. self._TestRun( sess, 2, use_function_backup=use_function_backup, expect_engine_is_run=False)
def _TestStaticOp(self, use_function_backup): if not is_tensorrt_enabled(): return tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir3") output_saved_model_dir = os.path.join(tmp_dir, "out_dir3") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = self._ConvertGraph( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, maximum_cached_engines=2, # This is noop, added just for testing. use_function_backup=use_function_backup) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.session(config=self._GetConfigProto()) as sess: # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun( sess, 1, use_function_backup=use_function_backup, expect_engine_is_run=True) # Run with batch size 2, which exceed the max_batch_size, it should try # to fall back to TF function. self._TestRun( sess, 2, use_function_backup=use_function_backup, expect_engine_is_run=False) # Test the output SavedModel with ops.Graph().as_default(): with self.session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun( sess, 1, use_function_backup=use_function_backup, expect_engine_is_run=True) # Run with batch size 2, which exceed the max_batch_size, it should try # to fall back to TF function. self._TestRun( sess, 2, use_function_backup=use_function_backup, expect_engine_is_run=False)
def testTrtGraphConverter_DestroyEngineCache(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return np_input1, np_input2 = self._RandomInput([4, 1, 1]) # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. converter = self._CreateConverterV2(input_saved_model_dir) converter.convert() def _InputFn(): yield np_input1, np_input2 converter.build(input_fn=_InputFn) # Populate the TRT engine cache. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) def _DestroyCache(): with ops.device("GPU:0"): handle = gen_trt_ops.create_trt_resource_handle( resource_name="TRTEngineOp_0") gen_resource_variable_ops.destroy_resource_op( handle, ignore_lookup_error=False) with self.assertRaisesRegexp(errors.NotFoundError, r"Resource .* does not exist."): _DestroyCache() # Load the converted model and make sure the engine cache is populated by # default. root = load.load(output_saved_model_dir) _DestroyCache() with self.assertRaisesRegexp(errors.NotFoundError, r"Resource .* does not exist."): _DestroyCache() # Load the converted model again and make sure the engine cache is destroyed # when the model goes out of scope. root = load.load(output_saved_model_dir) del root gc.collect() # Force GC to destroy the TRT engine cache. with self.assertRaisesRegexp(errors.NotFoundError, r"Resource .* does not exist."): _DestroyCache()
def testTrtGraphConverter_MinimumSegmentSize(self): if not is_tensorrt_enabled(): return output_graph_def = self._ConvertGraph(minimum_segment_size=5) node_name_to_op = {node.name: node.op for node in output_graph_def.node} self.assertEqual( { "add/ReadVariableOp": "Const", "input": "Placeholder", "add": "Add", "mul": "Mul", "add_1": "Add", "output": "Identity" }, node_name_to_op)
def testTrtGraphConverter_Int8Conversion_v2(self): if not is_tensorrt_enabled(): return np_input = np.random.random_sample([4, 1, 1]).astype(np.float32) # Create a model and save it. input_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) root = self._GetModelForV2() expected_output = root.run(np_input) save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. converter = self._CreateConverterV2( input_saved_model_dir, precision_mode=trt_convert.TrtPrecisionMode.INT8) converted_func = converter.convert() # Run the converted function for INT8 calibration. calibration_output = converted_func(np_input) self.assertEqual(1, len(calibration_output)) self.assertAllClose(expected_output, calibration_output.values()[0], atol=1e-6, rtol=1e-6) # Save the converted model again with serialized engine cache. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) expected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine.TRTEngineOp_0") self.assertTrue(os.path.exists(expected_asset_file)) self.assertTrue(os.path.getsize(expected_asset_file)) # Load and verify the converted model. root_with_trt = load.load(output_saved_model_dir) converted_signature = root_with_trt.signatures[ _SAVED_MODEL_SIGNATURE_KEY] output_with_trt = converted_signature(ops.convert_to_tensor(np_input)) self.assertEqual(1, len(output_with_trt)) # The output of running the converted signature is a dict due to # compatibility reasons with V1 SavedModel signature mechanism. self.assertAllClose(expected_output, output_with_trt.values()[0], atol=1e-6, rtol=1e-6)
def testTrtGraphConverter_DestroyEngineCache(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return np_input = np.random.random_sample([4, 1, 1]).astype(np.float32) # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. converter = self._CreateConverterV2(input_saved_model_dir) converted_func = converter.convert() converted_func(np_input) # Populate the TRT engine cache. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) def _destroy_cache(): with ops.device("GPU:0"): handle = gen_trt_ops.create_trt_engine_cache_handle( container=trt_convert._TRT_ENGINE_CACHE_CONTAINER_NAME, resource_name="TRTEngineOp_0") gen_resource_variable_ops.destroy_resource_op( handle, ignore_lookup_error=False) with self.assertRaisesRegexp(errors.NotFoundError, r"Resource .* does not exist."): _destroy_cache() # Load the converted model and make sure the engine cache is populated by # default. root = load.load(output_saved_model_dir) _destroy_cache() with self.assertRaisesRegexp(errors.NotFoundError, r"Resource .* does not exist."): _destroy_cache() # Load the converted model again and make sure the engine cache is destroyed # when the model goes out of scope. root = load.load(output_saved_model_dir) del root gc.collect() # Force GC to destroy the TRT engine cache. with self.assertRaisesRegexp(errors.NotFoundError, r"Resource .* does not exist."): _destroy_cache()
def testRetainSignatureInfo_TwoOutputSignatureKeys(self): if not is_tensorrt_enabled(): return class _Model(tracking.AutoTrackable): @def_function.function(input_signature=[ tensor_spec.TensorSpec(shape=[None, 1], dtype=dtypes.float32) ]) def run(self, inp): # Here the keys are not ordered lexicographically on purpose. return { "output_b": array_ops.constant(1.0), "output_a": inp + inp * inp } self._CompareSavedModel(_Model)
def load_trt_ops(): """Load TF-TRT op libraries so if it hasn't been loaded already.""" global _tf_trt_so if not is_tensorrt_enabled(): return if platform.system() == "Windows": raise RuntimeError("Windows platforms are not supported") with _module_lock: if _tf_trt_so: return try: # pylint: disable=g-import-not-at-top,unused-variable # This will call register_op_list() in # tensorflow/python/framework/op_def_registry.py, but it doesn't register # the op or the op kernel in C++ runtime. from tensorflow.compiler.tf2tensorrt.ops.gen_trt_ops import trt_engine_op # pylint: enable=g-import-not-at-top,unused-variable except ImportError as e: print( "**** Failed to import TF-TRT ops. This is because the binary was " "not built with CUDA or TensorRT enabled. ****") raise e try: # pylint: disable=g-import-not-at-top from tensorflow.python.framework import load_library from tensorflow.python.platform import resource_loader # pylint: enable=g-import-not-at-top # Loading the shared object will cause registration of the op and the op # kernel if we link TF-TRT dynamically. _tf_trt_so = load_library.load_op_library( resource_loader.get_path_to_datafile("libtftrt.so")) except errors.NotFoundError as e: no_trt_message = ( "**** Failed to initialize TensorRT. This is either because the " "TensorRT installation path is not in LD_LIBRARY_PATH, or because " "you do not have it installed. If not installed, please go to " "https://developer.nvidia.com/tensorrt to download and install " "TensorRT ****") print(no_trt_message) raise e
def testTrtGraphConverter_StaticConversionNotSupportedInV2(self): """Test case for trt_convert.TrtGraphConverter() using static mode.""" if not is_tensorrt_enabled(): return # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. with self.assertRaisesRegexp( ValueError, r"Option is_dynamic_op=False is not supported in TF 2.0, " "please set it to True instead."): self._CreateConverterV2(input_saved_model_dir, is_dynamic_op=False)
def testTrtGraphConverter_BasicConversion(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return input_saved_model_dir = self.mkdtemp() self._WriteInputSavedModel(input_saved_model_dir) for need_calibration in [False, True]: # Use GraphDef as input. self._TestTrtGraphConverter() # Use SavedModel as input. self._TestTrtGraphConverter( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=self.mkdtemp(), need_calibration=need_calibration)
def testGetTensorrtRewriterConfig(self): """Test case for TrtGraphConverter.get_tensorrt_rewriter_config().""" if not is_tensorrt_enabled(): return conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace( max_batch_size=128, max_workspace_size_bytes=1234, precision_mode="INT8", minimum_segment_size=10, is_dynamic_op=True, maximum_cached_engines=2, cached_engine_batches=[1, 128]) rewriter_cfg = trt_convert.get_tensorrt_rewriter_config( conversion_params=conversion_params) self.assertEqual(["constfold", "layout", "constfold"], rewriter_cfg.optimizers) self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE, rewriter_cfg.meta_optimizer_iterations) trt_optimizer = None for optimizer in rewriter_cfg.custom_optimizers: if optimizer.name == "TensorRTOptimizer": self.assertTrue(trt_optimizer is None) trt_optimizer = optimizer self.assertTrue(trt_optimizer is not None) for key in [ "minimum_segment_size", "max_batch_size", "is_dynamic_op", "max_workspace_size_bytes", "precision_mode", "maximum_cached_engines", "cached_engine_batches" ]: self.assertTrue(key in trt_optimizer.parameter_map) self.assertEqual(10, trt_optimizer.parameter_map["minimum_segment_size"].i) self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i) self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b) self.assertEqual( 1234, trt_optimizer.parameter_map["max_workspace_size_bytes"].i) self.assertEqual(trt_convert._to_bytes("INT8"), trt_optimizer.parameter_map["precision_mode"].s) self.assertEqual( 2, trt_optimizer.parameter_map["maximum_cached_engines"].i) self.assertEqual( [1, 128], trt_optimizer.parameter_map["cached_engine_batches"].list.i)
def testBackwardCompatibility(self): """Load and execute a model that was saved in TF2.0.""" if not is_tensorrt_enabled(): return model_dir = test.test_src_dir_path( "python/compiler/tensorrt/test/testdata/tftrt_2.0_saved_model") saved_model_loaded = load.load(model_dir, tags=[tag_constants.SERVING]) graph_func = saved_model_loaded.signatures[ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] np_input1 = ops.convert_to_tensor( np.ones([4, 1, 1]).astype(np.float32)) np_input2 = ops.convert_to_tensor( np.ones([4, 1, 1]).astype(np.float32)) output = graph_func(input1=np_input1, input2=np_input2)["output_0"] self.assertEqual(output.shape, (4, 1, 1)) self.assertAllClose( np.asarray([5.0, 5.0, 5.0, 5.0]).reshape([4, 1, 1]), output)
def testTrtGraphConverter_BasicConversion(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir1") self._WriteInputSavedModel(input_saved_model_dir) for need_calibration in [False, True]: # Use GraphDef as input. self._TestTrtGraphConverter() # Use SavedModel as input. output_saved_model_dir = os.path.join( tmp_dir, "out_dir1%s" % ("_int8" if need_calibration else "")) self._TestTrtGraphConverter( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, need_calibration=need_calibration)
def testTrtGraphConverter_BasicConversion(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir1") self._WriteInputSavedModel(input_saved_model_dir) for need_calibration in [False, True]: # Use GraphDef as input. self._TestTrtGraphConverter() # Use SavedModel as input. output_saved_model_dir = os.path.join( tmp_dir, "out_dir1%s" % ("_int8" if need_calibration else "")) self._TestTrtGraphConverter( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, need_calibration=need_calibration)
def testGetTensorrtRewriterConfig(self): """Test case for TrtGraphConverter.get_tensorrt_rewriter_config().""" if not is_tensorrt_enabled(): return conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace( max_batch_size=128, max_workspace_size_bytes=1234, precision_mode="INT8", minimum_segment_size=10, is_dynamic_op=True, maximum_cached_engines=2, cached_engine_batches=[1, 128]) rewriter_cfg = trt_convert.get_tensorrt_rewriter_config( conversion_params=conversion_params) self.assertEqual(["constfold", "layout", "constfold"], rewriter_cfg.optimizers) self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE, rewriter_cfg.meta_optimizer_iterations) trt_optimizer = None for optimizer in rewriter_cfg.custom_optimizers: if optimizer.name == "TensorRTOptimizer": self.assertTrue(trt_optimizer is None) trt_optimizer = optimizer self.assertTrue(trt_optimizer is not None) for key in [ "minimum_segment_size", "max_batch_size", "is_dynamic_op", "max_workspace_size_bytes", "precision_mode", "maximum_cached_engines", "cached_engine_batches" ]: self.assertTrue(key in trt_optimizer.parameter_map) self.assertEqual(10, trt_optimizer.parameter_map["minimum_segment_size"].i) self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i) self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b) self.assertEqual(1234, trt_optimizer.parameter_map["max_workspace_size_bytes"].i) self.assertEqual( trt_convert._to_bytes("INT8"), trt_optimizer.parameter_map["precision_mode"].s) self.assertEqual(2, trt_optimizer.parameter_map["maximum_cached_engines"].i) self.assertEqual( [1, 128], trt_optimizer.parameter_map["cached_engine_batches"].list.i)
def testTrtGraphConverter_BasicConversion_v2(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return # TODO(laigd): we need to use ops like conv2d so Grappler can infer the # shapes (at least rank) of the tensors, so we're able to build an TRT # engine in dynamic mode. Currently shape information is not propagate from # ConcreteFunction to GraphDef, need to investigate and fix it. class SimpleModel(tracking.AutoTrackable): def __init__(self): self.v = None @def_function.function(input_signature=[ tensor_spec.TensorSpec(shape=[None, 24, 24, 2], dtype=dtypes.float32) ]) def run(self, inp): if self.v is None: self.v = variables.Variable([[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]]) conv = gen_nn_ops.conv2d(input=inp, filter=self.v, strides=[1, 2, 2, 1], padding="SAME") identity = array_ops.identity(conv) return identity tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir1_v2") root = SimpleModel() save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Convert the SavedModel and verify the result. output_saved_model_dir = os.path.join(tmp_dir, "out_dir1_v2") self._TestTrtGraphConverter( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, is_dynamic_op=True)
def testTrtGraphConverter_DynamicOp(self): if not is_tensorrt_enabled(): return tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir2") output_saved_model_dir = os.path.join(tmp_dir, "out_dir2") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = self._ConvertGraph( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, is_dynamic_op=True, maximum_cached_engines=2, use_function_backup=False) # Disallow fallback. # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.session(config=self._GetConfigProto()) as sess: # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3) # Test the output SavedModel with ops.Graph().as_default(): with self.session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3)
def testTrtGraphConverter_BasicConversion_v2(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return # TODO(laigd): we need to use ops like conv2d so Grappler can infer the # shapes (at least rank) of the tensors, so we're able to build an TRT # engine in dynamic mode. Currently shape information is not propagate from # ConcreteFunction to GraphDef, need to investigate and fix it. class SimpleModel(tracking.AutoTrackable): def __init__(self): self.v = None @def_function.function(input_signature=[ tensor_spec.TensorSpec(shape=[None, 24, 24, 2], dtype=dtypes.float32) ]) def run(self, inp): if self.v is None: self.v = variables.Variable([[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]]) conv = gen_nn_ops.conv2d( input=inp, filter=self.v, strides=[1, 2, 2, 1], padding="SAME") identity = array_ops.identity(conv) return identity tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir1_v2") root = SimpleModel() save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Convert the SavedModel and verify the result. output_saved_model_dir = os.path.join(tmp_dir, "out_dir1_v2") self._TestTrtGraphConverter( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, is_dynamic_op=True)
def testTrtGraphConverter_DynamicOp(self): if not is_tensorrt_enabled(): return tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir2") output_saved_model_dir = os.path.join(tmp_dir, "out_dir2") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = self._ConvertGraph( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, is_dynamic_op=True, maximum_cached_engines=2, use_function_backup=False) # Disallow fallback. # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.session(config=self._GetConfigProto()) as sess: # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3) # Test the output SavedModel with ops.Graph().as_default(): with self.session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3)
def testTrtGraphConverter_DynamicConversion_v2(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return np_input1, np_input2 = self._RandomInput([4, 1, 1]) # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() expected_output = root.run(np_input1, np_input2) save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. converter = self._CreateConverterV2(input_saved_model_dir) converter.convert() # Verify the converted GraphDef and ConcreteFunction. self._CheckTrtOps(converter._converted_func) # pylint: disable=protected-access # Save the converted model without any TRT engine cache. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) unexpected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine.TRTEngineOp_0") self.assertFalse(os.path.exists(unexpected_asset_file)) # Run the converted function to populate the engine cache. def _InputFn(): yield np_input1, np_input2 converter.build(input_fn=_InputFn) # Save the converted model again with serialized engine cache. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) expected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine.TRTEngineOp_0") self.assertTrue(os.path.exists(expected_asset_file)) self.assertTrue(os.path.getsize(expected_asset_file)) del converter gc.collect() # Force GC to destroy the TRT engine cache. # Load and verify the converted model. # # TODO(laigd): the name of the new input_signature of the # `root_with_trt.run` function is empty string (originaly was None), # investigate why. root_with_trt = load.load(output_saved_model_dir) # TODO(laigd): `root_with_trt.run` is still using the original graph without # trt. Consider changing that. # self._CheckTrtOps(root_with_trt.run.get_concrete_function()) converted_signature = root_with_trt.signatures[ _SAVED_MODEL_SIGNATURE_KEY] self._CheckTrtOps(converted_signature) output_with_trt = converted_signature( inp1=ops.convert_to_tensor(np_input1), inp2=ops.convert_to_tensor(np_input2)) # The output of running the converted signature is a dict due to # compatibility reasons with V1 SavedModel signature mechanism. self.assertAllClose(expected_output, list(output_with_trt.values())[0], atol=1e-6, rtol=1e-6) del root_with_trt gc.collect() # Force GC to destroy the TRT engine cache.
def testTrtGraphConverter_BasicConversion_v2(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return np_input = np.random.random_sample([4, 1, 1]).astype(np.float32) # Create a model and save it. input_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) root = self._GetModelForV2() expected_output = root.run(np_input) save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. converter = trt_convert.TrtGraphConverterV2( input_saved_model_dir=input_saved_model_dir, input_saved_model_signature_key=_SAVED_MODEL_SIGNATURE_KEY, conversion_params=trt_convert.DEFAULT_TRT_CONVERSION_PARAMS. _replace(precision_mode=trt_convert.TrtPrecisionMode.FP32, is_dynamic_op=True, maximum_cached_engines=2, use_function_backup=False)) converted_func = converter.convert() def _check_trt_ops(graph_def): trt_op_names = [ node.name for node in graph_def.node if node.op == "TRTEngineOp" ] for func in graph_def.library.function: for node in func.node_def: if node.op == "TRTEngineOp": trt_op_names.append(node.name) self.assertEqual(1, len(trt_op_names)) self.assertIn("TRTEngineOp_0", trt_op_names[0]) # Verify the converted GraphDef and ConcreteFunction. self.assertIsInstance(converted_func, def_function.Function) converted_concrete_func = converted_func.get_concrete_function( tensor_spec.TensorSpec(shape=[None, 1, 1], dtype=dtypes.float32)) _check_trt_ops(converted_concrete_func.graph.as_graph_def()) # Save the converted model without any TRT engine cache. output_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) converter.save(output_saved_model_dir) unexpected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine.TRTEngineOp_0") self.assertFalse(os.path.exists(unexpected_asset_file)) # Run the converted function to populate the engine cache. output_with_trt = converted_func(np_input) self.assertEqual(1, len(output_with_trt)) self.assertAllClose(expected_output, output_with_trt[0], atol=1e-6, rtol=1e-6) # Save the converted model again with serialized engine cache. output_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) converter.save(output_saved_model_dir) expected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine.TRTEngineOp_0") self.assertTrue(os.path.exists(expected_asset_file)) self.assertTrue(os.path.getsize(expected_asset_file)) # Load and verify the converted model. # # TODO(laigd): the name of then new input_signature of the # `root_with_trt.run` function is empty string (originaly was None), # investigate why. root_with_trt = load.load(output_saved_model_dir) # TODO(laigd): `root_with_trt.run` is still using the original graph without # trt. Consider changing that. # _check_trt_ops( # root_with_trt.run.get_concrete_function().graph.as_graph_def()) converted_signature = root_with_trt.signatures[ _SAVED_MODEL_SIGNATURE_KEY] _check_trt_ops(converted_signature.graph.as_graph_def()) output_with_trt = converted_signature(ops.convert_to_tensor(np_input)) # The output of running the converted signature is a dict due to # compatibility reasons with V1 SavedModel signature mechanism. output_with_trt = output_with_trt[output_with_trt.keys()[0]] self.assertAllClose(expected_output, output_with_trt, atol=1e-6, rtol=1e-6)
def testTrtGraphConverter_BasicConversion_v2(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return np_input = np.random.random_sample([4, 1, 1]).astype(np.float32) # Create a model and save it. input_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) root = self._GetModelForV2() expected_output = root.run(np_input) save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. converter = trt_convert.TrtGraphConverterV2( input_saved_model_dir=input_saved_model_dir, input_saved_model_signature_key=_SAVED_MODEL_SIGNATURE_KEY, conversion_params=trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace( precision_mode=trt_convert.TrtPrecisionMode.FP32, is_dynamic_op=True, maximum_cached_engines=2, use_function_backup=False)) converted_func = converter.convert() def _check_trt_ops(graph_def): trt_op_names = [ node.name for node in graph_def.node if node.op == "TRTEngineOp" ] for func in graph_def.library.function: for node in func.node_def: if node.op == "TRTEngineOp": trt_op_names.append(node.name) self.assertEqual(1, len(trt_op_names)) self.assertIn("TRTEngineOp_0", trt_op_names[0]) # Verify the converted GraphDef and ConcreteFunction. self.assertIsInstance(converted_func, def_function.Function) converted_concrete_func = converted_func.get_concrete_function( tensor_spec.TensorSpec(shape=[None, 1, 1], dtype=dtypes.float32)) _check_trt_ops(converted_concrete_func.graph.as_graph_def()) # Save the converted model without any TRT engine cache. output_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) converter.save(output_saved_model_dir) unexpected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine.TRTEngineOp_0") self.assertFalse(os.path.exists(unexpected_asset_file)) # Run the converted function to populate the engine cache. output_with_trt = converted_func(np_input) self.assertEqual(1, len(output_with_trt)) self.assertAllClose( expected_output, output_with_trt[0], atol=1e-6, rtol=1e-6) # Save the converted model again with serialized engine cache. output_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) converter.save(output_saved_model_dir) expected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine.TRTEngineOp_0") self.assertTrue(os.path.exists(expected_asset_file)) self.assertTrue(os.path.getsize(expected_asset_file)) # Load and verify the converted model. # # TODO(laigd): the name of then new input_signature of the # `root_with_trt.run` function is empty string (originaly was None), # investigate why. root_with_trt = load.load(output_saved_model_dir) # TODO(laigd): `root_with_trt.run` is still using the original graph without # trt. Consider changing that. # _check_trt_ops( # root_with_trt.run.get_concrete_function().graph.as_graph_def()) converted_signature = root_with_trt.signatures[_SAVED_MODEL_SIGNATURE_KEY] _check_trt_ops(converted_signature.graph.as_graph_def()) output_with_trt = converted_signature(ops.convert_to_tensor(np_input)) # The output of running the converted signature is a dict due to # compatibility reasons with V1 SavedModel signature mechanism. output_with_trt = output_with_trt[output_with_trt.keys()[0]] self.assertAllClose(expected_output, output_with_trt, atol=1e-6, rtol=1e-6)
from tensorflow.python.saved_model import tag_constants from tensorflow.python.training import saver from tensorflow.python.training.tracking import tracking from tensorflow.python.util.lazy_loader import LazyLoader # Lazily load the op, since it's not available in cpu-only builds. Importing # this at top will cause tests that imports TF-TRT fail when they're built # and run without CUDA/GPU. gen_trt_ops = LazyLoader( "gen_trt_ops", globals(), "tensorflow.compiler.tf2tensorrt.ops.gen_trt_ops") # Register TRT ops in python, so that when users import this module they can # execute a TRT-converted graph without calling any of the methods in this # module. if wrap_py_utils.is_tensorrt_enabled(): if platform.system() == "Windows": raise RuntimeError("Windows platform is not supported") # This will call register_op_list() in # tensorflow/python/framework/op_def_registry.py, but it doesn't register # the op or the op kernel in C++ runtime. gen_trt_ops.trt_engine_op # pylint: disable=pointless-statement def _to_bytes(s): """Encode s if it is a sequence of chars.""" if isinstance(s, _six.text_type): return s.encode("utf-8", errors="surrogateescape") return s
# We ignore the use_optimizer option and always use TrtGraphConverter # for INT8 mode, so no need to run it twice. continue if use_calibration and not dynamic_engine: # Static engine with use_calibration=False will be static, so we want to # test that. If use_calibration=True, only dynamic op is supported. # TODO(aaroey): construction of static calibration engine is not # supported yet. continue else: if use_calibration: # Don't calibrate in FP32 or FP16 mode continue conversion = "OptimizerConversion" if use_optimizer else "ToolConversion" engine_type = "DynamicEngine" if dynamic_engine else "StaticEngine" calibration_type = "UseCalibration" if use_calibration else "NoCalibration" test_name = "%s_%s_%s_%s" % (conversion, engine_type, precision_mode, calibration_type) run_params = RunParams( use_optimizer=use_optimizer, precision_mode=precision_mode, dynamic_engine=dynamic_engine, test_name=test_name, use_calibration=use_calibration) setattr(test_class, "testTfTrt_" + test_name, _GetTest(run_params)) if is_tensorrt_enabled(): _AddTests(TfTrtIntegrationTestBase)
for (precision_mode, convert_online, dynamic_engine, use_calibration) in opts: conversion = "OnlineConversion" if convert_online else "OfflineConversion" engine_type = "DynamicEngine" if dynamic_engine else "StaticEngine" calibration_type = "UseCalibration" if use_calibration else "NoCalibration" test_name = "%s_%s_%s_%s_%s" % ("testTfTrtV2" if is_v2 else "testTfTrt", conversion, engine_type, precision_mode, calibration_type) run_params = RunParams( convert_online=convert_online, precision_mode=precision_mode, dynamic_engine=dynamic_engine, test_name=test_name, use_calibration=use_calibration, is_v2=is_v2) if is_v2: setattr(test_class, test_name, test_util.run_v2_only(_GetTest(run_params))) else: setattr(test_class, test_name, test_util.run_v1_only("", _GetTest(run_params))) def _AddTests(test_class): """Adds test methods to TfTrtIntegrationTestBase.""" _AddTestsFor(test_class, is_v2=False) _AddTestsFor(test_class, is_v2=True) if is_tensorrt_enabled(): _AddTests(TfTrtIntegrationTestBase)
def testTrtGraphConverter_Int8Conversion_v2(self): if not is_tensorrt_enabled(): return np_input1, np_input2 = self._RandomInput([4, 1, 1]) # Create a model and save it. input_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) root = self._GetModelForV2() expected_output = root.run(np_input1, np_input2) save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) # Run TRT conversion. converter = self._CreateConverterV2( input_saved_model_dir, precision_mode=trt_convert.TrtPrecisionMode.INT8, maximum_cached_engines=3) # Convert and perform INT8 calibration def _CalibrationInputFn(): yield np_input1, np_input2 converter.convert(calibration_input_fn=_CalibrationInputFn) def _CheckFn(node): self.assertTrue(len(node.attr["calibration_data"].s), node.name) # Verify the converted GraphDef. self._CheckTrtOps(converter._converted_func, _CheckFn) # pylint: disable=protected-access # Build another engine with different batch size. def _InputFn(): yield self._RandomInput([5, 1, 1]) converter.build(input_fn=_InputFn) # Save the converted model. # TODO(laigd): check that it should contain two engines. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) expected_asset_file = os.path.join( output_saved_model_dir, "assets/trt-serialized-engine.TRTEngineOp_0") self.assertTrue(os.path.exists(expected_asset_file)) self.assertTrue(os.path.getsize(expected_asset_file)) del converter gc.collect() # Force GC to destroy the TRT engine cache. # Load and verify the converted model. root_with_trt = load.load(output_saved_model_dir) converted_signature = root_with_trt.signatures[ _SAVED_MODEL_SIGNATURE_KEY] self._CheckTrtOps(converted_signature, _CheckFn) output_with_trt = converted_signature( inp1=ops.convert_to_tensor(np_input1), inp2=ops.convert_to_tensor(np_input2)) self.assertEqual(1, len(output_with_trt)) # The output of running the converted signature is a dict due to # compatibility reasons with V1 SavedModel signature mechanism. self.assertAllClose(expected_output, list(output_with_trt.values())[0], atol=1e-6, rtol=1e-6) # Run with an input of different batch size. It should build a new engine # using calibration table. # TODO(laigd): check that it should contain three engines. np_input1, np_input2 = self._RandomInput([6, 1, 1]) converted_signature(inp1=ops.convert_to_tensor(np_input1), inp2=ops.convert_to_tensor(np_input2)) del root_with_trt gc.collect() # Force GC to destroy the TRT engine cache.