Ejemplo n.º 1
0
    def testTrtGraphConverter_OnlineConversion(self, device):
        """Test case for TF-TRT conversion using Grappler directly."""
        if not is_tensorrt_enabled():
            return

        conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            precision_mode=trt_convert.TrtPrecisionMode.FP32,
            is_dynamic_op=True)
        config = self._GetConfigProto(
            rewriter_config=trt_convert.get_tensorrt_rewriter_config(
                conversion_params, is_v2=False))

        with ops.Graph().as_default():
            # Online conversion requires a frozen graph, so we reuse inp1 as the var
            # argument.
            inp1 = array_ops.placeholder(dtype=dtypes.float32,
                                         shape=[None, 1, 1],
                                         name="input1")
            inp2 = array_ops.placeholder(dtype=dtypes.float32,
                                         shape=[None, 1, 1],
                                         name="input2")
            if device:
                with ops.device(device):
                    TrtConvertTest._GetGraph(inp1, inp2, inp1)
            else:
                TrtConvertTest._GetGraph(inp1, inp2, inp1)
            with self.session(config=config) as sess:
                self._TestRun(sess, batch_size=1)
Ejemplo n.º 2
0
    def _TestStaticOp(self):
        if not is_tensorrt_enabled():
            return

        input_saved_model_dir = self.mkdtemp()
        output_saved_model_dir = self.mkdtemp()
        self._WriteInputSavedModel(input_saved_model_dir)
        output_graph_def = self._ConvertGraph(
            input_saved_model_dir=input_saved_model_dir,
            output_saved_model_dir=output_saved_model_dir,
            maximum_cached_engines=2)

        # Test the output GraphDef.
        with ops.Graph().as_default():
            importer.import_graph_def(output_graph_def, name="")
            with self.session(config=self._GetConfigProto()) as sess:
                # Run with batch size 1, the default engine embedded in the graphdef
                # will be used.
                self._TestRun(sess, 1, expect_engine_is_run=True)
                # Run with batch size 2, which exceed the max_batch_size, it should try
                # to fall back to TF function.
                self._TestRun(sess, 2, expect_engine_is_run=False)

        # Test the output SavedModel
        with ops.Graph().as_default():
            with self.session(config=self._GetConfigProto()) as sess:
                loader.load(sess, [tag_constants.SERVING],
                            output_saved_model_dir)
                # Run with batch size 1, the default engine embedded in the graphdef
                # will be used.
                self._TestRun(sess, 1, expect_engine_is_run=True)
                # Run with batch size 2, which exceed the max_batch_size, it should try
                # to fall back to TF function.
                self._TestRun(sess, 2, expect_engine_is_run=False)
  def testEval(self):
    if not is_tensorrt_enabled():
      return
    model_dir = test.test_src_dir_path('python/compiler/tensorrt/test/testdata')

    accuracy_tf_native = self._Run(
        is_training=False,
        use_trt=False,
        batch_size=128,
        num_epochs=None,
        model_dir=model_dir)['accuracy']
    logging.info('accuracy_tf_native: %f', accuracy_tf_native)
    self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3)

    if get_linked_tensorrt_version()[0] < 5:
      return

    accuracy_tf_trt = self._Run(
        is_training=False,
        use_trt=True,
        batch_size=128,
        num_epochs=None,
        model_dir=model_dir)['accuracy']
    logging.info('accuracy_tf_trt: %f', accuracy_tf_trt)
    self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3)
Ejemplo n.º 4
0
    def testTrtGraphConverter_StaticConversion_v2(self):
        """Test case for trt_convert.TrtGraphConverter() using static mode."""
        if not is_tensorrt_enabled():
            return

        np_input = np.random.random_sample([4, 1, 1]).astype(np.float32)

        # Create a model and save it.
        input_saved_model_dir = self.mkdtemp()
        root = self._GetModelForV2()
        expected_output = root.run(np_input)
        save.save(root, input_saved_model_dir,
                  {_SAVED_MODEL_SIGNATURE_KEY: root.run})

        # Run TRT conversion.
        converter = self._CreateConverterV2(input_saved_model_dir,
                                            max_batch_size=4)
        converted_func = converter.convert()

        def _CheckTrtOps(graph_def):
            trt_op_names = [
                node.name for node in graph_def.node
                if node.op == "TRTEngineOp"
            ]
            for func in graph_def.library.function:
                for node in func.node_def:
                    if node.op == "TRTEngineOp":
                        trt_op_names.append(node.name)
                        self.assertTrue(len(node.attr["serialized_segment"].s),
                                        node.name)
            self.assertEqual(1, len(trt_op_names))
            self.assertIn("TRTEngineOp_0", trt_op_names[0])

        # Verify the converted GraphDef and ConcreteFunction.
        self.assertIsInstance(converted_func, def_function.Function)
        converted_concrete_func = converted_func.get_concrete_function(
            tensor_spec.TensorSpec(shape=[None, 1, 1], dtype=dtypes.float32))
        _CheckTrtOps(converted_concrete_func.graph.as_graph_def())

        # Save the converted model with the statically-built engine inlined.
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)
        unexpected_asset_file = os.path.join(
            output_saved_model_dir,
            "assets/trt-serialized-engine.TRTEngineOp_0")
        self.assertFalse(os.path.exists(unexpected_asset_file))

        # Load and verify the converted model.
        root_with_trt = load.load(output_saved_model_dir)
        converted_signature = root_with_trt.signatures[
            _SAVED_MODEL_SIGNATURE_KEY]
        _CheckTrtOps(converted_signature.graph.as_graph_def())
        output_with_trt = converted_signature(ops.convert_to_tensor(np_input))
        # The output of running the converted signature is a dict due to
        # compatibility reasons with V1 SavedModel signature mechanism.
        output_with_trt = output_with_trt.values()[0]
        self.assertAllClose(expected_output,
                            output_with_trt,
                            atol=1e-6,
                            rtol=1e-6)
Ejemplo n.º 5
0
    def testTrtGraphConverter_DynamicOp(self):
        if not is_tensorrt_enabled():
            return

        output_saved_model_dir = self.mkdtemp()
        output_graph_def = self._ConvertGraphV1(
            output_saved_model_dir=output_saved_model_dir,
            is_dynamic_op=True,
            maximum_cached_engines=2)

        # Test the output GraphDef.
        with ops.Graph().as_default():
            importer.import_graph_def(output_graph_def, name="")
            with self.session(config=self._GetConfigProto()) as sess:
                # Run with batch size 1, a new engine is created and cached.
                self._TestRun(sess, 1)
                # Run with batch size 2, a new engine is created and cached.
                self._TestRun(sess, 2)
                # Run with batch size 3, since the number of cached engines has reached
                # the max, it should evict an old engine and create a new one.
                self._TestRun(sess, 3)

        # Test the output SavedModel
        with ops.Graph().as_default():
            with self.session(config=self._GetConfigProto()) as sess:
                loader.load(sess, [tag_constants.SERVING],
                            output_saved_model_dir)
                # Run with batch size 1, a new engine is created and cached.
                self._TestRun(sess, 1)
                # Run with batch size 2, a new engine is created and cached.
                self._TestRun(sess, 2)
                # Run with batch size 3, since the number of cached engines has reached
                # the max, it should evict an old engine and create a new one.
                self._TestRun(sess, 3)
  def testEval(self):
    if not is_tensorrt_enabled():
      return
    model_dir = test.test_src_dir_path('python/compiler/tensorrt/test/testdata')

    accuracy_tf_native = self._Run(
        is_training=False,
        use_trt=False,
        batch_size=128,
        num_epochs=None,
        model_dir=model_dir)['accuracy']
    logging.info('accuracy_tf_native: %f', accuracy_tf_native)
    self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3)

    if get_linked_tensorrt_version()[0] < 5:
      return

    accuracy_tf_trt = self._Run(
        is_training=False,
        use_trt=True,
        batch_size=128,
        num_epochs=None,
        model_dir=model_dir)['accuracy']
    logging.info('accuracy_tf_trt: %f', accuracy_tf_trt)
    self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3)
Ejemplo n.º 7
0
    def testGetTensorrtRewriterConfigTemplate(self):
        """Test case for TrtGraphConverter.get_tensorrt_rewriter_config()."""
        if not is_tensorrt_enabled():
            return

        rewriter_config_with_trt = rewriter_config_pb2.RewriterConfig()
        rewriter_config_with_trt.optimizers.extend(
            ["constfold", "layout", "constfold"])
        rewriter_config_with_trt.meta_optimizer_iterations = (
            rewriter_config_pb2.RewriterConfig.ONE)
        optimizer = rewriter_config_with_trt.custom_optimizers.add()
        rewriter_config_with_trt.custom_optimizers.add().name = "constfold"
        optimizer.name = "TensorRTOptimizer"
        optimizer.parameter_map["minimum_segment_size"].i = 10
        optimizer.parameter_map["max_batch_size"].i = 128
        optimizer.parameter_map["is_dynamic_op"].b = True
        optimizer.parameter_map["max_workspace_size_bytes"].i = 1234
        optimizer.parameter_map["precision_mode"].s = trt_convert._to_bytes(
            trt_convert.TrtPrecisionMode.INT8)
        optimizer.parameter_map["maximum_cached_engines"].i = 2
        optimizer.parameter_map["use_calibration"].b = False
        optimizer.parameter_map["use_implicit_batch"].b = True

        conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            rewriter_config_template=rewriter_config_with_trt)
        rewriter_cfg = trt_convert.get_tensorrt_rewriter_config(
            conversion_params=conversion_params)
        self.assertEqual(["constfold", "layout", "constfold"],
                         rewriter_cfg.optimizers)
        self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE,
                         rewriter_cfg.meta_optimizer_iterations)
        trt_optimizer = None
        for optimizer in rewriter_cfg.custom_optimizers:
            if optimizer.name == "TensorRTOptimizer":
                self.assertIsNone(trt_optimizer)
                trt_optimizer = optimizer
        self.assertIsNotNone(trt_optimizer)
        for key in [
                "minimum_segment_size", "max_batch_size", "is_dynamic_op",
                "max_workspace_size_bytes", "precision_mode",
                "maximum_cached_engines"
        ]:
            self.assertIn(key, trt_optimizer.parameter_map)
        self.assertEqual(10,
                         trt_optimizer.parameter_map["minimum_segment_size"].i)
        self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i)
        self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b)
        self.assertEqual(
            1234, trt_optimizer.parameter_map["max_workspace_size_bytes"].i)
        self.assertEqual(trt_convert._to_bytes("INT8"),
                         trt_optimizer.parameter_map["precision_mode"].s)
        self.assertEqual(
            2, trt_optimizer.parameter_map["maximum_cached_engines"].i)
        self.assertEqual(False,
                         trt_optimizer.parameter_map["use_calibration"].b)
        self.assertEqual(True,
                         trt_optimizer.parameter_map["use_implicit_batch"].b)
Ejemplo n.º 8
0
    def testRetainSignatureInfo_OneOutputSignatureKey(self):
        if not is_tensorrt_enabled():
            return

        class _Model(tracking.AutoTrackable):
            @def_function.function(input_signature=[])
            def run(self):
                return {"my_output": array_ops.constant(1.0)}

        self._CompareSavedModel(_Model)
Ejemplo n.º 9
0
    def testRetainSignatureInfo_OneInput(self):
        if not is_tensorrt_enabled():
            return

        class _Model(tracking.AutoTrackable):
            @def_function.function(input_signature=[
                tensor_spec.TensorSpec(shape=[None, 1], dtype=dtypes.float32)
            ])
            def run(self, inp):
                return inp + inp * inp

        self._CompareSavedModel(_Model)
Ejemplo n.º 10
0
    def testTrtGraphConverter_OfflineConversion(self, device):
        """Test case for trt_convert.TrtGraphConverter()."""
        if not is_tensorrt_enabled():
            return

        for need_calibration in [False, True]:
            # Use GraphDef as input.
            self._TestTrtGraphConverter(device)

            # Use SavedModel as input.
            self._TestTrtGraphConverter(device,
                                        output_saved_model_dir=self.mkdtemp(),
                                        need_calibration=need_calibration)
Ejemplo n.º 11
0
 def testTrtGraphConverter_MinimumSegmentSize(self):
   if not is_tensorrt_enabled():
     return
   output_graph_def = self._ConvertGraph(minimum_segment_size=5)
   node_name_to_op = {node.name: node.op for node in output_graph_def.node}
   self.assertEqual({
       "v1/read": "Const",
       "input": "Placeholder",
       "add": "Add",
       "mul": "Mul",
       "add_1": "Add",
       "output": "Identity"
   }, node_name_to_op)
Ejemplo n.º 12
0
  def _TestStaticOp(self, use_function_backup):
    if not is_tensorrt_enabled():
      return

    tmp_dir = self.get_temp_dir()
    input_saved_model_dir = os.path.join(tmp_dir, "in_dir3")
    output_saved_model_dir = os.path.join(tmp_dir, "out_dir3")
    self._WriteInputSavedModel(input_saved_model_dir)
    output_graph_def = self._ConvertGraph(
        input_saved_model_dir=input_saved_model_dir,
        output_saved_model_dir=output_saved_model_dir,
        maximum_cached_engines=2,  # This is noop, added just for testing.
        use_function_backup=use_function_backup)

    # Test the output GraphDef.
    with ops.Graph().as_default():
      importer.import_graph_def(output_graph_def, name="")
      with self.session(config=self._GetConfigProto()) as sess:
        # Run with batch size 1, the default engine embedded in the graphdef
        # will be used.
        self._TestRun(
            sess,
            1,
            use_function_backup=use_function_backup,
            expect_engine_is_run=True)
        # Run with batch size 2, which exceed the max_batch_size, it should try
        # to fall back to TF function.
        self._TestRun(
            sess,
            2,
            use_function_backup=use_function_backup,
            expect_engine_is_run=False)

    # Test the output SavedModel
    with ops.Graph().as_default():
      with self.session(config=self._GetConfigProto()) as sess:
        loader.load(sess, [tag_constants.SERVING], output_saved_model_dir)
        # Run with batch size 1, the default engine embedded in the graphdef
        # will be used.
        self._TestRun(
            sess,
            1,
            use_function_backup=use_function_backup,
            expect_engine_is_run=True)
        # Run with batch size 2, which exceed the max_batch_size, it should try
        # to fall back to TF function.
        self._TestRun(
            sess,
            2,
            use_function_backup=use_function_backup,
            expect_engine_is_run=False)
Ejemplo n.º 13
0
  def _TestStaticOp(self, use_function_backup):
    if not is_tensorrt_enabled():
      return

    tmp_dir = self.get_temp_dir()
    input_saved_model_dir = os.path.join(tmp_dir, "in_dir3")
    output_saved_model_dir = os.path.join(tmp_dir, "out_dir3")
    self._WriteInputSavedModel(input_saved_model_dir)
    output_graph_def = self._ConvertGraph(
        input_saved_model_dir=input_saved_model_dir,
        output_saved_model_dir=output_saved_model_dir,
        maximum_cached_engines=2,  # This is noop, added just for testing.
        use_function_backup=use_function_backup)

    # Test the output GraphDef.
    with ops.Graph().as_default():
      importer.import_graph_def(output_graph_def, name="")
      with self.session(config=self._GetConfigProto()) as sess:
        # Run with batch size 1, the default engine embedded in the graphdef
        # will be used.
        self._TestRun(
            sess,
            1,
            use_function_backup=use_function_backup,
            expect_engine_is_run=True)
        # Run with batch size 2, which exceed the max_batch_size, it should try
        # to fall back to TF function.
        self._TestRun(
            sess,
            2,
            use_function_backup=use_function_backup,
            expect_engine_is_run=False)

    # Test the output SavedModel
    with ops.Graph().as_default():
      with self.session(config=self._GetConfigProto()) as sess:
        loader.load(sess, [tag_constants.SERVING], output_saved_model_dir)
        # Run with batch size 1, the default engine embedded in the graphdef
        # will be used.
        self._TestRun(
            sess,
            1,
            use_function_backup=use_function_backup,
            expect_engine_is_run=True)
        # Run with batch size 2, which exceed the max_batch_size, it should try
        # to fall back to TF function.
        self._TestRun(
            sess,
            2,
            use_function_backup=use_function_backup,
            expect_engine_is_run=False)
Ejemplo n.º 14
0
    def testTrtGraphConverter_DestroyEngineCache(self):
        """Test case for trt_convert.TrtGraphConverter()."""
        if not is_tensorrt_enabled():
            return

        np_input1, np_input2 = self._RandomInput([4, 1, 1])

        # Create a model and save it.
        input_saved_model_dir = self.mkdtemp()
        root = self._GetModelForV2()
        save.save(root, input_saved_model_dir,
                  {_SAVED_MODEL_SIGNATURE_KEY: root.run})

        # Run TRT conversion.
        converter = self._CreateConverterV2(input_saved_model_dir)
        converter.convert()

        def _InputFn():
            yield np_input1, np_input2

        converter.build(input_fn=_InputFn)  # Populate the TRT engine cache.
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)

        def _DestroyCache():
            with ops.device("GPU:0"):
                handle = gen_trt_ops.create_trt_resource_handle(
                    resource_name="TRTEngineOp_0")
                gen_resource_variable_ops.destroy_resource_op(
                    handle, ignore_lookup_error=False)

        with self.assertRaisesRegexp(errors.NotFoundError,
                                     r"Resource .* does not exist."):
            _DestroyCache()

        # Load the converted model and make sure the engine cache is populated by
        # default.
        root = load.load(output_saved_model_dir)
        _DestroyCache()
        with self.assertRaisesRegexp(errors.NotFoundError,
                                     r"Resource .* does not exist."):
            _DestroyCache()

        # Load the converted model again and make sure the engine cache is destroyed
        # when the model goes out of scope.
        root = load.load(output_saved_model_dir)
        del root
        gc.collect()  # Force GC to destroy the TRT engine cache.
        with self.assertRaisesRegexp(errors.NotFoundError,
                                     r"Resource .* does not exist."):
            _DestroyCache()
Ejemplo n.º 15
0
 def testTrtGraphConverter_MinimumSegmentSize(self):
   if not is_tensorrt_enabled():
     return
   output_graph_def = self._ConvertGraph(minimum_segment_size=5)
   node_name_to_op = {node.name: node.op for node in output_graph_def.node}
   self.assertEqual(
       {
           "add/ReadVariableOp": "Const",
           "input": "Placeholder",
           "add": "Add",
           "mul": "Mul",
           "add_1": "Add",
           "output": "Identity"
       }, node_name_to_op)
Ejemplo n.º 16
0
    def testTrtGraphConverter_Int8Conversion_v2(self):
        if not is_tensorrt_enabled():
            return

        np_input = np.random.random_sample([4, 1, 1]).astype(np.float32)

        # Create a model and save it.
        input_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
        root = self._GetModelForV2()
        expected_output = root.run(np_input)
        save.save(root, input_saved_model_dir,
                  {_SAVED_MODEL_SIGNATURE_KEY: root.run})

        # Run TRT conversion.
        converter = self._CreateConverterV2(
            input_saved_model_dir,
            precision_mode=trt_convert.TrtPrecisionMode.INT8)
        converted_func = converter.convert()

        # Run the converted function for INT8 calibration.
        calibration_output = converted_func(np_input)
        self.assertEqual(1, len(calibration_output))
        self.assertAllClose(expected_output,
                            calibration_output.values()[0],
                            atol=1e-6,
                            rtol=1e-6)

        # Save the converted model again with serialized engine cache.
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)
        expected_asset_file = os.path.join(
            output_saved_model_dir,
            "assets/trt-serialized-engine.TRTEngineOp_0")
        self.assertTrue(os.path.exists(expected_asset_file))
        self.assertTrue(os.path.getsize(expected_asset_file))

        # Load and verify the converted model.
        root_with_trt = load.load(output_saved_model_dir)
        converted_signature = root_with_trt.signatures[
            _SAVED_MODEL_SIGNATURE_KEY]
        output_with_trt = converted_signature(ops.convert_to_tensor(np_input))
        self.assertEqual(1, len(output_with_trt))

        # The output of running the converted signature is a dict due to
        # compatibility reasons with V1 SavedModel signature mechanism.
        self.assertAllClose(expected_output,
                            output_with_trt.values()[0],
                            atol=1e-6,
                            rtol=1e-6)
Ejemplo n.º 17
0
    def testTrtGraphConverter_DestroyEngineCache(self):
        """Test case for trt_convert.TrtGraphConverter()."""
        if not is_tensorrt_enabled():
            return

        np_input = np.random.random_sample([4, 1, 1]).astype(np.float32)

        # Create a model and save it.
        input_saved_model_dir = self.mkdtemp()
        root = self._GetModelForV2()
        save.save(root, input_saved_model_dir,
                  {_SAVED_MODEL_SIGNATURE_KEY: root.run})

        # Run TRT conversion.
        converter = self._CreateConverterV2(input_saved_model_dir)
        converted_func = converter.convert()
        converted_func(np_input)  # Populate the TRT engine cache.
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)

        def _destroy_cache():
            with ops.device("GPU:0"):
                handle = gen_trt_ops.create_trt_engine_cache_handle(
                    container=trt_convert._TRT_ENGINE_CACHE_CONTAINER_NAME,
                    resource_name="TRTEngineOp_0")
                gen_resource_variable_ops.destroy_resource_op(
                    handle, ignore_lookup_error=False)

        with self.assertRaisesRegexp(errors.NotFoundError,
                                     r"Resource .* does not exist."):
            _destroy_cache()

        # Load the converted model and make sure the engine cache is populated by
        # default.
        root = load.load(output_saved_model_dir)
        _destroy_cache()
        with self.assertRaisesRegexp(errors.NotFoundError,
                                     r"Resource .* does not exist."):
            _destroy_cache()

        # Load the converted model again and make sure the engine cache is destroyed
        # when the model goes out of scope.
        root = load.load(output_saved_model_dir)
        del root
        gc.collect()  # Force GC to destroy the TRT engine cache.
        with self.assertRaisesRegexp(errors.NotFoundError,
                                     r"Resource .* does not exist."):
            _destroy_cache()
Ejemplo n.º 18
0
    def testRetainSignatureInfo_TwoOutputSignatureKeys(self):
        if not is_tensorrt_enabled():
            return

        class _Model(tracking.AutoTrackable):
            @def_function.function(input_signature=[
                tensor_spec.TensorSpec(shape=[None, 1], dtype=dtypes.float32)
            ])
            def run(self, inp):
                # Here the keys are not ordered lexicographically on purpose.
                return {
                    "output_b": array_ops.constant(1.0),
                    "output_a": inp + inp * inp
                }

        self._CompareSavedModel(_Model)
Ejemplo n.º 19
0
Archivo: trt_ops.py Proyecto: EEELF/ll
def load_trt_ops():
    """Load TF-TRT op libraries so if it hasn't been loaded already."""
    global _tf_trt_so

    if not is_tensorrt_enabled():
        return

    if platform.system() == "Windows":
        raise RuntimeError("Windows platforms are not supported")

    with _module_lock:
        if _tf_trt_so:
            return

        try:
            # pylint: disable=g-import-not-at-top,unused-variable
            # This will call register_op_list() in
            # tensorflow/python/framework/op_def_registry.py, but it doesn't register
            # the op or the op kernel in C++ runtime.
            from tensorflow.compiler.tf2tensorrt.ops.gen_trt_ops import trt_engine_op
            # pylint: enable=g-import-not-at-top,unused-variable
        except ImportError as e:
            print(
                "**** Failed to import TF-TRT ops. This is because the binary was "
                "not built with CUDA or TensorRT enabled. ****")
            raise e

        try:
            # pylint: disable=g-import-not-at-top
            from tensorflow.python.framework import load_library
            from tensorflow.python.platform import resource_loader
            # pylint: enable=g-import-not-at-top

            # Loading the shared object will cause registration of the op and the op
            # kernel if we link TF-TRT dynamically.
            _tf_trt_so = load_library.load_op_library(
                resource_loader.get_path_to_datafile("libtftrt.so"))
        except errors.NotFoundError as e:
            no_trt_message = (
                "**** Failed to initialize TensorRT. This is either because the "
                "TensorRT installation path is not in LD_LIBRARY_PATH, or because "
                "you do not have it installed. If not installed, please go to "
                "https://developer.nvidia.com/tensorrt to download and install "
                "TensorRT ****")
            print(no_trt_message)
            raise e
Ejemplo n.º 20
0
    def testTrtGraphConverter_StaticConversionNotSupportedInV2(self):
        """Test case for trt_convert.TrtGraphConverter() using static mode."""
        if not is_tensorrt_enabled():
            return

        # Create a model and save it.
        input_saved_model_dir = self.mkdtemp()
        root = self._GetModelForV2()
        save.save(root, input_saved_model_dir,
                  {_SAVED_MODEL_SIGNATURE_KEY: root.run})

        # Run TRT conversion.
        with self.assertRaisesRegexp(
                ValueError,
                r"Option is_dynamic_op=False is not supported in TF 2.0, "
                "please set it to True instead."):
            self._CreateConverterV2(input_saved_model_dir, is_dynamic_op=False)
Ejemplo n.º 21
0
    def testTrtGraphConverter_BasicConversion(self):
        """Test case for trt_convert.TrtGraphConverter()."""
        if not is_tensorrt_enabled():
            return

        input_saved_model_dir = self.mkdtemp()
        self._WriteInputSavedModel(input_saved_model_dir)

        for need_calibration in [False, True]:
            # Use GraphDef as input.
            self._TestTrtGraphConverter()

            # Use SavedModel as input.
            self._TestTrtGraphConverter(
                input_saved_model_dir=input_saved_model_dir,
                output_saved_model_dir=self.mkdtemp(),
                need_calibration=need_calibration)
Ejemplo n.º 22
0
 def testGetTensorrtRewriterConfig(self):
     """Test case for TrtGraphConverter.get_tensorrt_rewriter_config()."""
     if not is_tensorrt_enabled():
         return
     conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace(
         max_batch_size=128,
         max_workspace_size_bytes=1234,
         precision_mode="INT8",
         minimum_segment_size=10,
         is_dynamic_op=True,
         maximum_cached_engines=2,
         cached_engine_batches=[1, 128])
     rewriter_cfg = trt_convert.get_tensorrt_rewriter_config(
         conversion_params=conversion_params)
     self.assertEqual(["constfold", "layout", "constfold"],
                      rewriter_cfg.optimizers)
     self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE,
                      rewriter_cfg.meta_optimizer_iterations)
     trt_optimizer = None
     for optimizer in rewriter_cfg.custom_optimizers:
         if optimizer.name == "TensorRTOptimizer":
             self.assertTrue(trt_optimizer is None)
             trt_optimizer = optimizer
     self.assertTrue(trt_optimizer is not None)
     for key in [
             "minimum_segment_size", "max_batch_size", "is_dynamic_op",
             "max_workspace_size_bytes", "precision_mode",
             "maximum_cached_engines", "cached_engine_batches"
     ]:
         self.assertTrue(key in trt_optimizer.parameter_map)
     self.assertEqual(10,
                      trt_optimizer.parameter_map["minimum_segment_size"].i)
     self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i)
     self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b)
     self.assertEqual(
         1234, trt_optimizer.parameter_map["max_workspace_size_bytes"].i)
     self.assertEqual(trt_convert._to_bytes("INT8"),
                      trt_optimizer.parameter_map["precision_mode"].s)
     self.assertEqual(
         2, trt_optimizer.parameter_map["maximum_cached_engines"].i)
     self.assertEqual(
         [1, 128],
         trt_optimizer.parameter_map["cached_engine_batches"].list.i)
Ejemplo n.º 23
0
    def testBackwardCompatibility(self):
        """Load and execute a model that was saved in TF2.0."""
        if not is_tensorrt_enabled():
            return

        model_dir = test.test_src_dir_path(
            "python/compiler/tensorrt/test/testdata/tftrt_2.0_saved_model")
        saved_model_loaded = load.load(model_dir, tags=[tag_constants.SERVING])
        graph_func = saved_model_loaded.signatures[
            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]

        np_input1 = ops.convert_to_tensor(
            np.ones([4, 1, 1]).astype(np.float32))
        np_input2 = ops.convert_to_tensor(
            np.ones([4, 1, 1]).astype(np.float32))
        output = graph_func(input1=np_input1, input2=np_input2)["output_0"]

        self.assertEqual(output.shape, (4, 1, 1))
        self.assertAllClose(
            np.asarray([5.0, 5.0, 5.0, 5.0]).reshape([4, 1, 1]), output)
Ejemplo n.º 24
0
    def testTrtGraphConverter_BasicConversion(self):
        """Test case for trt_convert.TrtGraphConverter()."""
        if not is_tensorrt_enabled():
            return

        tmp_dir = self.get_temp_dir()
        input_saved_model_dir = os.path.join(tmp_dir, "in_dir1")
        self._WriteInputSavedModel(input_saved_model_dir)

        for need_calibration in [False, True]:
            # Use GraphDef as input.
            self._TestTrtGraphConverter()

            # Use SavedModel as input.
            output_saved_model_dir = os.path.join(
                tmp_dir, "out_dir1%s" % ("_int8" if need_calibration else ""))
            self._TestTrtGraphConverter(
                input_saved_model_dir=input_saved_model_dir,
                output_saved_model_dir=output_saved_model_dir,
                need_calibration=need_calibration)
Ejemplo n.º 25
0
  def testTrtGraphConverter_BasicConversion(self):
    """Test case for trt_convert.TrtGraphConverter()."""
    if not is_tensorrt_enabled():
      return

    tmp_dir = self.get_temp_dir()
    input_saved_model_dir = os.path.join(tmp_dir, "in_dir1")
    self._WriteInputSavedModel(input_saved_model_dir)

    for need_calibration in [False, True]:
      # Use GraphDef as input.
      self._TestTrtGraphConverter()

      # Use SavedModel as input.
      output_saved_model_dir = os.path.join(
          tmp_dir, "out_dir1%s" % ("_int8" if need_calibration else ""))
      self._TestTrtGraphConverter(
          input_saved_model_dir=input_saved_model_dir,
          output_saved_model_dir=output_saved_model_dir,
          need_calibration=need_calibration)
Ejemplo n.º 26
0
 def testGetTensorrtRewriterConfig(self):
   """Test case for TrtGraphConverter.get_tensorrt_rewriter_config()."""
   if not is_tensorrt_enabled():
     return
   conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace(
       max_batch_size=128,
       max_workspace_size_bytes=1234,
       precision_mode="INT8",
       minimum_segment_size=10,
       is_dynamic_op=True,
       maximum_cached_engines=2,
       cached_engine_batches=[1, 128])
   rewriter_cfg = trt_convert.get_tensorrt_rewriter_config(
       conversion_params=conversion_params)
   self.assertEqual(["constfold", "layout", "constfold"],
                    rewriter_cfg.optimizers)
   self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE,
                    rewriter_cfg.meta_optimizer_iterations)
   trt_optimizer = None
   for optimizer in rewriter_cfg.custom_optimizers:
     if optimizer.name == "TensorRTOptimizer":
       self.assertTrue(trt_optimizer is None)
       trt_optimizer = optimizer
   self.assertTrue(trt_optimizer is not None)
   for key in [
       "minimum_segment_size", "max_batch_size", "is_dynamic_op",
       "max_workspace_size_bytes", "precision_mode", "maximum_cached_engines",
       "cached_engine_batches"
   ]:
     self.assertTrue(key in trt_optimizer.parameter_map)
   self.assertEqual(10, trt_optimizer.parameter_map["minimum_segment_size"].i)
   self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i)
   self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b)
   self.assertEqual(1234,
                    trt_optimizer.parameter_map["max_workspace_size_bytes"].i)
   self.assertEqual(
       trt_convert._to_bytes("INT8"),
       trt_optimizer.parameter_map["precision_mode"].s)
   self.assertEqual(2, trt_optimizer.parameter_map["maximum_cached_engines"].i)
   self.assertEqual(
       [1, 128], trt_optimizer.parameter_map["cached_engine_batches"].list.i)
Ejemplo n.º 27
0
    def testTrtGraphConverter_BasicConversion_v2(self):
        """Test case for trt_convert.TrtGraphConverter()."""
        if not is_tensorrt_enabled():
            return

        # TODO(laigd): we need to use ops like conv2d so Grappler can infer the
        # shapes (at least rank) of the tensors, so we're able to build an TRT
        # engine in dynamic mode. Currently shape information is not propagate from
        # ConcreteFunction to GraphDef, need to investigate and fix it.
        class SimpleModel(tracking.AutoTrackable):
            def __init__(self):
                self.v = None

            @def_function.function(input_signature=[
                tensor_spec.TensorSpec(shape=[None, 24, 24, 2],
                                       dtype=dtypes.float32)
            ])
            def run(self, inp):
                if self.v is None:
                    self.v = variables.Variable([[[[1., 0.5, 4., 6., 0.5, 1.],
                                                   [1., 0.5, 1., 1., 0.5,
                                                    1.]]]])
                conv = gen_nn_ops.conv2d(input=inp,
                                         filter=self.v,
                                         strides=[1, 2, 2, 1],
                                         padding="SAME")
                identity = array_ops.identity(conv)
                return identity

        tmp_dir = self.get_temp_dir()
        input_saved_model_dir = os.path.join(tmp_dir, "in_dir1_v2")
        root = SimpleModel()
        save.save(root, input_saved_model_dir,
                  {_SAVED_MODEL_SIGNATURE_KEY: root.run})

        # Convert the SavedModel and verify the result.
        output_saved_model_dir = os.path.join(tmp_dir, "out_dir1_v2")
        self._TestTrtGraphConverter(
            input_saved_model_dir=input_saved_model_dir,
            output_saved_model_dir=output_saved_model_dir,
            is_dynamic_op=True)
Ejemplo n.º 28
0
    def testTrtGraphConverter_DynamicOp(self):
        if not is_tensorrt_enabled():
            return

        tmp_dir = self.get_temp_dir()
        input_saved_model_dir = os.path.join(tmp_dir, "in_dir2")
        output_saved_model_dir = os.path.join(tmp_dir, "out_dir2")
        self._WriteInputSavedModel(input_saved_model_dir)
        output_graph_def = self._ConvertGraph(
            input_saved_model_dir=input_saved_model_dir,
            output_saved_model_dir=output_saved_model_dir,
            is_dynamic_op=True,
            maximum_cached_engines=2,
            use_function_backup=False)  # Disallow fallback.

        # Test the output GraphDef.
        with ops.Graph().as_default():
            importer.import_graph_def(output_graph_def, name="")
            with self.session(config=self._GetConfigProto()) as sess:
                # Run with batch size 1, a new engine is created and cached.
                self._TestRun(sess, 1)
                # Run with batch size 2, a new engine is created and cached.
                self._TestRun(sess, 2)
                # Run with batch size 3, since the number of cached engines has reached
                # the max, it should evict an old engine and create a new one.
                self._TestRun(sess, 3)

        # Test the output SavedModel
        with ops.Graph().as_default():
            with self.session(config=self._GetConfigProto()) as sess:
                loader.load(sess, [tag_constants.SERVING],
                            output_saved_model_dir)
                # Run with batch size 1, a new engine is created and cached.
                self._TestRun(sess, 1)
                # Run with batch size 2, a new engine is created and cached.
                self._TestRun(sess, 2)
                # Run with batch size 3, since the number of cached engines has reached
                # the max, it should evict an old engine and create a new one.
                self._TestRun(sess, 3)
Ejemplo n.º 29
0
  def testTrtGraphConverter_BasicConversion_v2(self):
    """Test case for trt_convert.TrtGraphConverter()."""
    if not is_tensorrt_enabled():
      return

    # TODO(laigd): we need to use ops like conv2d so Grappler can infer the
    # shapes (at least rank) of the tensors, so we're able to build an TRT
    # engine in dynamic mode. Currently shape information is not propagate from
    # ConcreteFunction to GraphDef, need to investigate and fix it.
    class SimpleModel(tracking.AutoTrackable):

      def __init__(self):
        self.v = None

      @def_function.function(input_signature=[
          tensor_spec.TensorSpec(shape=[None, 24, 24, 2], dtype=dtypes.float32)
      ])
      def run(self, inp):
        if self.v is None:
          self.v = variables.Variable([[[[1., 0.5, 4., 6., 0.5, 1.],
                                         [1., 0.5, 1., 1., 0.5, 1.]]]])
        conv = gen_nn_ops.conv2d(
            input=inp, filter=self.v, strides=[1, 2, 2, 1], padding="SAME")
        identity = array_ops.identity(conv)
        return identity

    tmp_dir = self.get_temp_dir()
    input_saved_model_dir = os.path.join(tmp_dir, "in_dir1_v2")
    root = SimpleModel()
    save.save(root, input_saved_model_dir,
              {_SAVED_MODEL_SIGNATURE_KEY: root.run})

    # Convert the SavedModel and verify the result.
    output_saved_model_dir = os.path.join(tmp_dir, "out_dir1_v2")
    self._TestTrtGraphConverter(
        input_saved_model_dir=input_saved_model_dir,
        output_saved_model_dir=output_saved_model_dir,
        is_dynamic_op=True)
Ejemplo n.º 30
0
  def testTrtGraphConverter_DynamicOp(self):
    if not is_tensorrt_enabled():
      return

    tmp_dir = self.get_temp_dir()
    input_saved_model_dir = os.path.join(tmp_dir, "in_dir2")
    output_saved_model_dir = os.path.join(tmp_dir, "out_dir2")
    self._WriteInputSavedModel(input_saved_model_dir)
    output_graph_def = self._ConvertGraph(
        input_saved_model_dir=input_saved_model_dir,
        output_saved_model_dir=output_saved_model_dir,
        is_dynamic_op=True,
        maximum_cached_engines=2,
        use_function_backup=False)  # Disallow fallback.

    # Test the output GraphDef.
    with ops.Graph().as_default():
      importer.import_graph_def(output_graph_def, name="")
      with self.session(config=self._GetConfigProto()) as sess:
        # Run with batch size 1, a new engine is created and cached.
        self._TestRun(sess, 1)
        # Run with batch size 2, a new engine is created and cached.
        self._TestRun(sess, 2)
        # Run with batch size 3, since the number of cached engines has reached
        # the max, it should evict an old engine and create a new one.
        self._TestRun(sess, 3)

    # Test the output SavedModel
    with ops.Graph().as_default():
      with self.session(config=self._GetConfigProto()) as sess:
        loader.load(sess, [tag_constants.SERVING], output_saved_model_dir)
        # Run with batch size 1, a new engine is created and cached.
        self._TestRun(sess, 1)
        # Run with batch size 2, a new engine is created and cached.
        self._TestRun(sess, 2)
        # Run with batch size 3, since the number of cached engines has reached
        # the max, it should evict an old engine and create a new one.
        self._TestRun(sess, 3)
Ejemplo n.º 31
0
    def testTrtGraphConverter_DynamicConversion_v2(self):
        """Test case for trt_convert.TrtGraphConverter()."""
        if not is_tensorrt_enabled():
            return

        np_input1, np_input2 = self._RandomInput([4, 1, 1])

        # Create a model and save it.
        input_saved_model_dir = self.mkdtemp()
        root = self._GetModelForV2()
        expected_output = root.run(np_input1, np_input2)
        save.save(root, input_saved_model_dir,
                  {_SAVED_MODEL_SIGNATURE_KEY: root.run})

        # Run TRT conversion.
        converter = self._CreateConverterV2(input_saved_model_dir)
        converter.convert()

        # Verify the converted GraphDef and ConcreteFunction.
        self._CheckTrtOps(converter._converted_func)  # pylint: disable=protected-access

        # Save the converted model without any TRT engine cache.
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)
        unexpected_asset_file = os.path.join(
            output_saved_model_dir,
            "assets/trt-serialized-engine.TRTEngineOp_0")
        self.assertFalse(os.path.exists(unexpected_asset_file))

        # Run the converted function to populate the engine cache.
        def _InputFn():
            yield np_input1, np_input2

        converter.build(input_fn=_InputFn)

        # Save the converted model again with serialized engine cache.
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)
        expected_asset_file = os.path.join(
            output_saved_model_dir,
            "assets/trt-serialized-engine.TRTEngineOp_0")
        self.assertTrue(os.path.exists(expected_asset_file))
        self.assertTrue(os.path.getsize(expected_asset_file))

        del converter
        gc.collect()  # Force GC to destroy the TRT engine cache.

        # Load and verify the converted model.
        #
        # TODO(laigd): the name of the new input_signature of the
        # `root_with_trt.run` function is empty string (originaly was None),
        # investigate why.
        root_with_trt = load.load(output_saved_model_dir)
        # TODO(laigd): `root_with_trt.run` is still using the original graph without
        # trt. Consider changing that.
        # self._CheckTrtOps(root_with_trt.run.get_concrete_function())
        converted_signature = root_with_trt.signatures[
            _SAVED_MODEL_SIGNATURE_KEY]
        self._CheckTrtOps(converted_signature)
        output_with_trt = converted_signature(
            inp1=ops.convert_to_tensor(np_input1),
            inp2=ops.convert_to_tensor(np_input2))
        # The output of running the converted signature is a dict due to
        # compatibility reasons with V1 SavedModel signature mechanism.
        self.assertAllClose(expected_output,
                            list(output_with_trt.values())[0],
                            atol=1e-6,
                            rtol=1e-6)

        del root_with_trt
        gc.collect()  # Force GC to destroy the TRT engine cache.
Ejemplo n.º 32
0
    def testTrtGraphConverter_BasicConversion_v2(self):
        """Test case for trt_convert.TrtGraphConverter()."""
        if not is_tensorrt_enabled():
            return

        np_input = np.random.random_sample([4, 1, 1]).astype(np.float32)

        # Create a model and save it.
        input_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
        root = self._GetModelForV2()
        expected_output = root.run(np_input)
        save.save(root, input_saved_model_dir,
                  {_SAVED_MODEL_SIGNATURE_KEY: root.run})

        # Run TRT conversion.
        converter = trt_convert.TrtGraphConverterV2(
            input_saved_model_dir=input_saved_model_dir,
            input_saved_model_signature_key=_SAVED_MODEL_SIGNATURE_KEY,
            conversion_params=trt_convert.DEFAULT_TRT_CONVERSION_PARAMS.
            _replace(precision_mode=trt_convert.TrtPrecisionMode.FP32,
                     is_dynamic_op=True,
                     maximum_cached_engines=2,
                     use_function_backup=False))
        converted_func = converter.convert()

        def _check_trt_ops(graph_def):
            trt_op_names = [
                node.name for node in graph_def.node
                if node.op == "TRTEngineOp"
            ]
            for func in graph_def.library.function:
                for node in func.node_def:
                    if node.op == "TRTEngineOp":
                        trt_op_names.append(node.name)
            self.assertEqual(1, len(trt_op_names))
            self.assertIn("TRTEngineOp_0", trt_op_names[0])

        # Verify the converted GraphDef and ConcreteFunction.
        self.assertIsInstance(converted_func, def_function.Function)
        converted_concrete_func = converted_func.get_concrete_function(
            tensor_spec.TensorSpec(shape=[None, 1, 1], dtype=dtypes.float32))
        _check_trt_ops(converted_concrete_func.graph.as_graph_def())

        # Save the converted model without any TRT engine cache.
        output_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
        converter.save(output_saved_model_dir)
        unexpected_asset_file = os.path.join(
            output_saved_model_dir,
            "assets/trt-serialized-engine.TRTEngineOp_0")
        self.assertFalse(os.path.exists(unexpected_asset_file))

        # Run the converted function to populate the engine cache.
        output_with_trt = converted_func(np_input)
        self.assertEqual(1, len(output_with_trt))
        self.assertAllClose(expected_output,
                            output_with_trt[0],
                            atol=1e-6,
                            rtol=1e-6)

        # Save the converted model again with serialized engine cache.
        output_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
        converter.save(output_saved_model_dir)
        expected_asset_file = os.path.join(
            output_saved_model_dir,
            "assets/trt-serialized-engine.TRTEngineOp_0")
        self.assertTrue(os.path.exists(expected_asset_file))
        self.assertTrue(os.path.getsize(expected_asset_file))

        # Load and verify the converted model.
        #
        # TODO(laigd): the name of then new input_signature of the
        # `root_with_trt.run` function is empty string (originaly was None),
        # investigate why.
        root_with_trt = load.load(output_saved_model_dir)
        # TODO(laigd): `root_with_trt.run` is still using the original graph without
        # trt. Consider changing that.
        # _check_trt_ops(
        #     root_with_trt.run.get_concrete_function().graph.as_graph_def())
        converted_signature = root_with_trt.signatures[
            _SAVED_MODEL_SIGNATURE_KEY]
        _check_trt_ops(converted_signature.graph.as_graph_def())
        output_with_trt = converted_signature(ops.convert_to_tensor(np_input))
        # The output of running the converted signature is a dict due to
        # compatibility reasons with V1 SavedModel signature mechanism.
        output_with_trt = output_with_trt[output_with_trt.keys()[0]]
        self.assertAllClose(expected_output,
                            output_with_trt,
                            atol=1e-6,
                            rtol=1e-6)
Ejemplo n.º 33
0
  def testTrtGraphConverter_BasicConversion_v2(self):
    """Test case for trt_convert.TrtGraphConverter()."""
    if not is_tensorrt_enabled():
      return

    np_input = np.random.random_sample([4, 1, 1]).astype(np.float32)

    # Create a model and save it.
    input_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
    root = self._GetModelForV2()
    expected_output = root.run(np_input)
    save.save(root, input_saved_model_dir,
              {_SAVED_MODEL_SIGNATURE_KEY: root.run})

    # Run TRT conversion.
    converter = trt_convert.TrtGraphConverterV2(
        input_saved_model_dir=input_saved_model_dir,
        input_saved_model_signature_key=_SAVED_MODEL_SIGNATURE_KEY,
        conversion_params=trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            precision_mode=trt_convert.TrtPrecisionMode.FP32,
            is_dynamic_op=True,
            maximum_cached_engines=2,
            use_function_backup=False))
    converted_func = converter.convert()

    def _check_trt_ops(graph_def):
      trt_op_names = [
          node.name for node in graph_def.node if node.op == "TRTEngineOp"
      ]
      for func in graph_def.library.function:
        for node in func.node_def:
          if node.op == "TRTEngineOp":
            trt_op_names.append(node.name)
      self.assertEqual(1, len(trt_op_names))
      self.assertIn("TRTEngineOp_0", trt_op_names[0])

    # Verify the converted GraphDef and ConcreteFunction.
    self.assertIsInstance(converted_func, def_function.Function)
    converted_concrete_func = converted_func.get_concrete_function(
        tensor_spec.TensorSpec(shape=[None, 1, 1], dtype=dtypes.float32))
    _check_trt_ops(converted_concrete_func.graph.as_graph_def())

    # Save the converted model without any TRT engine cache.
    output_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
    converter.save(output_saved_model_dir)
    unexpected_asset_file = os.path.join(
        output_saved_model_dir, "assets/trt-serialized-engine.TRTEngineOp_0")
    self.assertFalse(os.path.exists(unexpected_asset_file))

    # Run the converted function to populate the engine cache.
    output_with_trt = converted_func(np_input)
    self.assertEqual(1, len(output_with_trt))
    self.assertAllClose(
        expected_output, output_with_trt[0], atol=1e-6, rtol=1e-6)

    # Save the converted model again with serialized engine cache.
    output_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
    converter.save(output_saved_model_dir)
    expected_asset_file = os.path.join(
        output_saved_model_dir, "assets/trt-serialized-engine.TRTEngineOp_0")
    self.assertTrue(os.path.exists(expected_asset_file))
    self.assertTrue(os.path.getsize(expected_asset_file))

    # Load and verify the converted model.
    #
    # TODO(laigd): the name of then new input_signature of the
    # `root_with_trt.run` function is empty string (originaly was None),
    # investigate why.
    root_with_trt = load.load(output_saved_model_dir)
    # TODO(laigd): `root_with_trt.run` is still using the original graph without
    # trt. Consider changing that.
    # _check_trt_ops(
    #     root_with_trt.run.get_concrete_function().graph.as_graph_def())
    converted_signature = root_with_trt.signatures[_SAVED_MODEL_SIGNATURE_KEY]
    _check_trt_ops(converted_signature.graph.as_graph_def())
    output_with_trt = converted_signature(ops.convert_to_tensor(np_input))
    # The output of running the converted signature is a dict due to
    # compatibility reasons with V1 SavedModel signature mechanism.
    output_with_trt = output_with_trt[output_with_trt.keys()[0]]
    self.assertAllClose(expected_output, output_with_trt, atol=1e-6, rtol=1e-6)
Ejemplo n.º 34
0
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.training import saver
from tensorflow.python.training.tracking import tracking
from tensorflow.python.util.lazy_loader import LazyLoader

# Lazily load the op, since it's not available in cpu-only builds. Importing
# this at top will cause tests that imports TF-TRT fail when they're built
# and run without CUDA/GPU.
gen_trt_ops = LazyLoader(
    "gen_trt_ops", globals(),
    "tensorflow.compiler.tf2tensorrt.ops.gen_trt_ops")

# Register TRT ops in python, so that when users import this module they can
# execute a TRT-converted graph without calling any of the methods in this
# module.
if wrap_py_utils.is_tensorrt_enabled():
  if platform.system() == "Windows":
    raise RuntimeError("Windows platform is not supported")

  # This will call register_op_list() in
  # tensorflow/python/framework/op_def_registry.py, but it doesn't register
  # the op or the op kernel in C++ runtime.
  gen_trt_ops.trt_engine_op  # pylint: disable=pointless-statement


def _to_bytes(s):
  """Encode s if it is a sequence of chars."""
  if isinstance(s, _six.text_type):
    return s.encode("utf-8", errors="surrogateescape")
  return s
        # We ignore the use_optimizer option and always use TrtGraphConverter
        # for INT8 mode, so no need to run it twice.
        continue
      if use_calibration and not dynamic_engine:
        # Static engine with use_calibration=False will be static, so we want to
        # test that. If use_calibration=True, only dynamic op is supported.
        # TODO(aaroey): construction of static calibration engine is not
        # supported yet.
        continue
    else:
      if use_calibration:
        # Don't calibrate in FP32 or FP16 mode
        continue

    conversion = "OptimizerConversion" if use_optimizer else "ToolConversion"
    engine_type = "DynamicEngine" if dynamic_engine else "StaticEngine"
    calibration_type = "UseCalibration" if use_calibration else "NoCalibration"
    test_name = "%s_%s_%s_%s" % (conversion, engine_type, precision_mode,
                                 calibration_type)
    run_params = RunParams(
        use_optimizer=use_optimizer,
        precision_mode=precision_mode,
        dynamic_engine=dynamic_engine,
        test_name=test_name,
        use_calibration=use_calibration)
    setattr(test_class, "testTfTrt_" + test_name, _GetTest(run_params))


if is_tensorrt_enabled():
  _AddTests(TfTrtIntegrationTestBase)
  for (precision_mode, convert_online, dynamic_engine, use_calibration) in opts:
    conversion = "OnlineConversion" if convert_online else "OfflineConversion"
    engine_type = "DynamicEngine" if dynamic_engine else "StaticEngine"
    calibration_type = "UseCalibration" if use_calibration else "NoCalibration"
    test_name = "%s_%s_%s_%s_%s" % ("testTfTrtV2" if is_v2 else "testTfTrt",
                                    conversion, engine_type, precision_mode,
                                    calibration_type)
    run_params = RunParams(
        convert_online=convert_online,
        precision_mode=precision_mode,
        dynamic_engine=dynamic_engine,
        test_name=test_name,
        use_calibration=use_calibration,
        is_v2=is_v2)
    if is_v2:
      setattr(test_class, test_name,
              test_util.run_v2_only(_GetTest(run_params)))
    else:
      setattr(test_class, test_name,
              test_util.run_v1_only("", _GetTest(run_params)))


def _AddTests(test_class):
  """Adds test methods to TfTrtIntegrationTestBase."""
  _AddTestsFor(test_class, is_v2=False)
  _AddTestsFor(test_class, is_v2=True)


if is_tensorrt_enabled():
  _AddTests(TfTrtIntegrationTestBase)
Ejemplo n.º 37
0
    def testTrtGraphConverter_Int8Conversion_v2(self):
        if not is_tensorrt_enabled():
            return

        np_input1, np_input2 = self._RandomInput([4, 1, 1])

        # Create a model and save it.
        input_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
        root = self._GetModelForV2()
        expected_output = root.run(np_input1, np_input2)
        save.save(root, input_saved_model_dir,
                  {_SAVED_MODEL_SIGNATURE_KEY: root.run})

        # Run TRT conversion.
        converter = self._CreateConverterV2(
            input_saved_model_dir,
            precision_mode=trt_convert.TrtPrecisionMode.INT8,
            maximum_cached_engines=3)

        # Convert and perform INT8 calibration
        def _CalibrationInputFn():
            yield np_input1, np_input2

        converter.convert(calibration_input_fn=_CalibrationInputFn)

        def _CheckFn(node):
            self.assertTrue(len(node.attr["calibration_data"].s), node.name)

        # Verify the converted GraphDef.
        self._CheckTrtOps(converter._converted_func, _CheckFn)  # pylint: disable=protected-access

        # Build another engine with different batch size.
        def _InputFn():
            yield self._RandomInput([5, 1, 1])

        converter.build(input_fn=_InputFn)

        # Save the converted model.
        # TODO(laigd): check that it should contain two engines.
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)
        expected_asset_file = os.path.join(
            output_saved_model_dir,
            "assets/trt-serialized-engine.TRTEngineOp_0")
        self.assertTrue(os.path.exists(expected_asset_file))
        self.assertTrue(os.path.getsize(expected_asset_file))

        del converter
        gc.collect()  # Force GC to destroy the TRT engine cache.

        # Load and verify the converted model.
        root_with_trt = load.load(output_saved_model_dir)
        converted_signature = root_with_trt.signatures[
            _SAVED_MODEL_SIGNATURE_KEY]
        self._CheckTrtOps(converted_signature, _CheckFn)
        output_with_trt = converted_signature(
            inp1=ops.convert_to_tensor(np_input1),
            inp2=ops.convert_to_tensor(np_input2))
        self.assertEqual(1, len(output_with_trt))
        # The output of running the converted signature is a dict due to
        # compatibility reasons with V1 SavedModel signature mechanism.
        self.assertAllClose(expected_output,
                            list(output_with_trt.values())[0],
                            atol=1e-6,
                            rtol=1e-6)

        # Run with an input of different batch size. It should build a new engine
        # using calibration table.
        # TODO(laigd): check that it should contain three engines.
        np_input1, np_input2 = self._RandomInput([6, 1, 1])
        converted_signature(inp1=ops.convert_to_tensor(np_input1),
                            inp2=ops.convert_to_tensor(np_input2))

        del root_with_trt
        gc.collect()  # Force GC to destroy the TRT engine cache.