Ejemplo n.º 1
0
def calib_graph_to_infer_graph(calibration_graph_def, is_dynamic_op=False):
    """Convert an existing calibration graph to inference graph.

  Args:
    calibration_graph_def: the calibration GraphDef object with calibration data
    is_dynamic_op: whether to create dynamic static engines from calibration

  Returns:
    New GraphDef with TRTEngineOps placed in graph replacing calibration nodes.
  Raises:
    RuntimeError: if the returned status message is malformed.
  """
    # Lazily load the TF-TRT C bindings, so `import tensorflow` doesn't complain
    # even if it cannot find TensorRT library.
    trt_ops.load_trt_ops()
    # pylint: disable=g-import-not-at-top,line-too-long
    from tensorflow.python.compiler.tensorrt.wrap_conversion import calib_convert
    # pylint: enable=g-import-not-at-top,line-too-long

    is_calib_graph = False
    for n in calibration_graph_def.node:
        if n.op == "TRTEngineOp":
            is_calib_graph = is_calib_graph or not n.attr["calibration_data"].s
    if not is_calib_graph:
        tf_logging.error(
            "Not a calib graph. Doesn't seem to contain any calibration nodes."
        )
        return None
    graph_str = calibration_graph_def.SerializeToString()
    out = calib_convert(graph_str, is_dynamic_op)
    status = _to_string(out[0])
    output_graph_def_string = out[1]
    del graph_str  # Save some memory
    if len(status) < 2:
        raise _impl.UnknownError(None, None, status)
    if status[:2] != "OK":
        msg = status.split(";")
        if len(msg) == 1:
            raise RuntimeError("Status message is malformed {}".format(status))
        # pylint: disable=protected-access
        raise _impl._make_specific_exception(None, None, ";".join(msg[1:]),
                                             int(msg[0]))
        # pylint: enable=protected-access
    output_graph_def = graph_pb2.GraphDef()
    output_graph_def.ParseFromString(output_graph_def_string)
    del output_graph_def_string  # Save some memory
    return output_graph_def
Ejemplo n.º 2
0
  def __init__(self,
               input_saved_model_dir=None,
               input_saved_model_tags=None,
               input_saved_model_signature_key=None,
               input_graph_def=None,
               nodes_blacklist=None,
               session_config=None,
               max_batch_size=1,
               max_workspace_size_bytes=DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
               precision_mode=TrtPrecisionMode.FP32,
               minimum_segment_size=3,
               is_dynamic_op=False,
               maximum_cached_engines=1,
               cached_engine_batches=None,
               use_calibration=True,
               use_function_backup=True):
    """Initialize the converter.

    Args:
      input_saved_model_dir: the directory to load the SavedModel which contains
        the input graph to transforms. Used only when input_graph_def is None.
      input_saved_model_tags: list of tags to load the SavedModel.
      input_saved_model_signature_key: the key of the signature to optimize the
        graph for.
      input_graph_def: a GraphDef object containing a model to be transformed.
        If set to None, the graph will be read from the SavedModel loaded from
        input_saved_model_dir.
      nodes_blacklist: list of node names to prevent the converter from
        touching. Only used when input_graph_def is not None.
      session_config: the ConfigProto used to create a Session. It's also used
        as a template to create a TRT-enabled ConfigProto for conversion. If not
        specified, a default ConfigProto will be used.
      max_batch_size: max size for the input batch.
      max_workspace_size_bytes: the maximum GPU temporary memory which the TRT
        engine can use at execution time. This corresponds to the
        'workspaceSize' parameter of nvinfer1::IBuilder::setMaxWorkspaceSize().
      precision_mode: one of TrtPrecisionMode.supported_precision_modes().
      minimum_segment_size: the minimum number of nodes required for a subgraph
        to be replaced by TRTEngineOp.
      is_dynamic_op: whether to generate dynamic TRT ops which will build the
        TRT network and engine at run time.
      maximum_cached_engines: max number of cached TRT engines in dynamic TRT
        ops. If the number of cached engines is already at max but none of them
        can serve the input, the TRTEngineOp will fall back to run the TF
        function based on which the TRTEngineOp is created.
      cached_engine_batches: a list of batch sizes used to create cached
        engines, only used when is_dynamic_op is True. The length of the list
        should be <= maximum_cached_engines, and the dynamic TRT op will use
        this list to determine the batch sizes of the cached engines, instead of
        making the decision on the fly. This is useful when we know the most
        common batch size(s) the application is going to generate.
      use_calibration: this argument is ignored if precision_mode is not INT8.
        If set to True, a calibration graph will be created to calibrate the
        missing ranges. The calibration graph must be converted to an inference
        graph by running calibration with calibrate(). If set to False,
        quantization nodes will be expected for every tensor in the graph
        (exlcuding those which will be fused). If a range is missing, an error
        will occur. Please note that accuracy may be negatively affected if
        there is a mismatch between which tensors TRT quantizes and which
        tensors were trained with fake quantization.
      use_function_backup: if set to True, it will create a FunctionDef for each
        subgraph that is converted to TRT op, and if TRT ops fail to execute at
        runtime, it'll invoke that function as a fallback.

    Raises:
      ValueError: if the combination of the parameters is invalid.
      RuntimeError: if the TensorRT library version is incompatible.
    """
    super(TrtGraphConverter, self).__init__(
        input_saved_model_dir=input_saved_model_dir,
        input_saved_model_tags=input_saved_model_tags,
        input_saved_model_signature_key=input_saved_model_signature_key,
        input_graph_def=input_graph_def,
        nodes_blacklist=nodes_blacklist,
        session_config=session_config)

    # TODO(laigd): move all the validations below to
    # get_tensorrt_rewriter_config().

    # Lazily load the TF-TRT C bindings, so `import tensorflow` doesn't complain
    # even if it cannot find TensorRT library.
    trt_ops.load_trt_ops()
    # pylint: disable=g-import-not-at-top,line-too-long
    from tensorflow.python.compiler.tensorrt.wrap_conversion import get_linked_tensorrt_version
    from tensorflow.python.compiler.tensorrt.wrap_conversion import get_loaded_tensorrt_version
    # pylint: enable=g-import-not-at-top,line-too-long

    # Check compatibility of TensorRT version.
    compiled_version = get_linked_tensorrt_version()
    loaded_version = get_loaded_tensorrt_version()
    tf_logging.info("Linked TensorRT version: %s" % str(compiled_version))
    tf_logging.info("Loaded TensorRT version: %s" % str(loaded_version))
    version_mismatch = False
    if loaded_version[0] < compiled_version[0]:
      tf_logging.error(
          "TensorRT version mismatch. Tensorflow was compiled against " +
          "TensorRT %s but library loaded from environment is TensorRT %s" %
          (".".join([str(x) for x in compiled_version]),
           ".".join([str(x) for x in loaded_version])) +
          ". Please make sure that correct version of TensorRT " +
          "is available in the system and added to ldconfig or LD_LIBRARY_PATH")
      raise RuntimeError("Incompatible TensorRT library version")
    for i in zip(loaded_version, compiled_version):
      if i[0] != i[1]:
        tf_logging.warn("TensorRT mismatch. Compiled against version " +
                        "%s, but loaded %s. Things may not work" %
                        (".".join([str(x) for x in compiled_version]),
                         ".".join([str(x) for x in loaded_version])))
        version_mismatch = True
        break
    if not version_mismatch:
      tf_logging.info("Running against TensorRT version %s" %
                      ".".join([str(x) for x in loaded_version]))

    # Check input arguments.
    supported_precision_modes = TrtPrecisionMode.supported_precision_modes()
    if precision_mode not in supported_precision_modes:
      raise ValueError(("precision mode '{}' is not supported."
                        "It should be one of {}").format(
                            precision_mode, supported_precision_modes))

    if cached_engine_batches:
      if not isinstance(cached_engine_batches, list):
        raise TypeError("cached_engine_batches should be a list.")
      if len(cached_engine_batches) > maximum_cached_engines:
        raise ValueError("cached_engine_batches should not contain more than "
                         "maximum_cached_engines items.")

    self._need_calibration = (
        precision_mode == TrtPrecisionMode.INT8 and use_calibration)
    self._use_function_backup = use_function_backup

    # TODO(laigd): consider provide a mechanism to remove the fallback path
    # after calibration is done.
    if self._need_calibration and not use_function_backup:
      raise ValueError(
          "Calibration requires enabling fallback to TF function execution.")

    # TODO(laigd):
    # - Get rid of is_dynamic_op option, it should always be True, and it should
    #   accept N shapes as input.
    # - Verify in int8 mode that maximum_cached_engines and
    #   cached_engine_batches are set appropriately.
    # - If it fails to build the int8 engine it should return error.
    self._max_batch_size = max_batch_size
    self._max_workspace_size_bytes = max_workspace_size_bytes
    self._precision_mode = precision_mode
    self._minimum_segment_size = minimum_segment_size
    self._is_dynamic_op = is_dynamic_op
    self._maximum_cached_engines = maximum_cached_engines
    self._cached_engine_batches = cached_engine_batches
Ejemplo n.º 3
0
  def get_tensorrt_rewriter_config(
      cls,
      rewriter_config_template=None,
      max_batch_size=1,
      max_workspace_size_bytes=DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
      precision_mode=TrtPrecisionMode.FP32,
      minimum_segment_size=3,
      is_dynamic_op=False,
      maximum_cached_engines=1,
      cached_engine_batches=None,
      use_calibration=True,
      use_function_backup=True):
    """Returns a RewriterConfig proto for TRT transformation.

    Args:
      rewriter_config_template: a template RewriterConfig proto used to create a
        TRT-enabled RewriterConfig. If None, it will use a default one.
      max_batch_size: max size for the input batch
      max_workspace_size_bytes: the maximum GPU temporary memory which the TRT
        engine can use at execution time. This corresponds to the
        'workspaceSize' parameter of nvinfer1::IBuilder::setMaxWorkspaceSize().
      precision_mode: one of TrtPrecisionMode.supported_precision_modes().
      minimum_segment_size: the minimum number of nodes required for a subgraph
        to be replaced by TRTEngineOp.
      is_dynamic_op: whether to generate dynamic TRT ops which will build the
        TRT network and engine at run time.
      maximum_cached_engines: max number of cached TRT engines in dynamic TRT
        ops. If the number of cached engines is already at max but none of them
        can serve the input, the TRTEngineOp will fall back to run the TF
        function based on which the TRTEngineOp is created.
      cached_engine_batches: a list of batch sizes used to create cached
        engines, only used when is_dynamic_op is True. The length of the list
        should be <= maximum_cached_engines, and the dynamic TRT op will use
        this list to determine the batch sizes of the cached engines, instead of
        making the decision on the fly. This is useful when we know the most
        common batch size(s) the application is going to generate.
      use_calibration: this argument is ignored if precision_mode is not INT8.
        If set to True, a calibration graph will be created to calibrate the
        missing ranges. The calibration graph must be converted to an inference
        graph by running calibration with calibrate(). If set to False,
        quantization nodes will be expected for every tensor in the graph
        (exlcuding those which will be fused). If a range is missing, an error
        will occur. Please note that accuracy may be negatively affected if
        there is a mismatch between which tensors TRT quantizes and which
        tensors were trained with fake quantization.
      use_function_backup: if set to True, it will create a FunctionDef for each
        subgraph that is converted to TRT op, and if TRT ops fail to execute at
        runtime, it'll invoke that function as a fallback.

    Returns:
      A RewriterConfig proto which sets a TensorRTOptimizer to run Grappler.

    Raises:
      TypeError: if any of the parameters are of unexpected type.
      ValueError: if any of the parameters are of unexpected value.
    """
    # Lazily load the TF-TRT C bindings, so `import tensorflow` doesn't complain
    # even if it cannot find TensorRT library.
    trt_ops.load_trt_ops()
    # pylint: disable=g-import-not-at-top,unused-import,line-too-long,unused-variable
    # Import a random symbol to trigger loading of TRT library.
    from tensorflow.python.compiler.tensorrt.wrap_conversion import get_linked_tensorrt_version
    # pylint: enable=g-import-not-at-top,unused-import,line-too-long,unused-variable

    if rewriter_config_template is not None and not isinstance(
        rewriter_config_template, rewriter_config_pb2.RewriterConfig):
      raise TypeError(
          "rewriter_config_template should be a RewriterConfig proto.")

    rewriter_config_with_trt = rewriter_config_pb2.RewriterConfig()
    if rewriter_config_template is None:
      # Layout optimizer may add Const nodes followed by Reshape nodes, thus we
      # need to run constant folding again.
      rewriter_config_with_trt.optimizers.extend(
          ["constfold", "layout", "constfold"])
      rewriter_config_with_trt.meta_optimizer_iterations = (
          rewriter_config_pb2.RewriterConfig.ONE)
    else:
      rewriter_config_with_trt.CopyFrom(rewriter_config_template)

    optimizer = rewriter_config_with_trt.custom_optimizers.add()
    optimizer.name = "TensorRTOptimizer"
    optimizer.parameter_map["minimum_segment_size"].i = minimum_segment_size
    optimizer.parameter_map["max_batch_size"].i = max_batch_size
    optimizer.parameter_map["is_dynamic_op"].b = is_dynamic_op
    optimizer.parameter_map[
        "max_workspace_size_bytes"].i = max_workspace_size_bytes
    optimizer.parameter_map["precision_mode"].s = _to_bytes(precision_mode)
    optimizer.parameter_map["maximum_cached_engines"].i = maximum_cached_engines
    if cached_engine_batches:
      optimizer.parameter_map["cached_engine_batches"].list.i.extend(
          cached_engine_batches)
    optimizer.parameter_map["use_calibration"].b = use_calibration
    optimizer.parameter_map["use_function_backup"].b = use_function_backup
    return rewriter_config_with_trt
Ejemplo n.º 4
0
from tensorflow.python.saved_model import loader
from tensorflow.python.saved_model import save
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.training import saver
from tensorflow.python.training.tracking import tracking
from tensorflow.python.util.lazy_loader import LazyLoader

# Import TRT library. This is fine since we don't import TF-TRT in
# tensorflow/python/compiler/__init__.py, and `import tensorflow` won't trigger
# importing of TF-TRT. Note that TF-TRT is still included in GPU build since
# tensorflow/python/BUILD depends on it.
#
# We need this import so that when users import this module, they can execute a
# TRT-converted graph without calling any of the methods in this module.
trt_ops.load_trt_ops()

# Lazily load the op, since it's not available in cpu-only builds. Importing
# this at top will cause tests that imports TF-TRT fail when they're built
# and run without CUDA/GPU.
gen_trt_ops = LazyLoader(
    "gen_trt_ops", globals(),
    "tensorflow.compiler.tf2tensorrt.ops.gen_trt_ops")


def _to_bytes(s):
  """Encode s if it is a sequence of chars."""
  if isinstance(s, _six.text_type):
    return s.encode("utf-8", errors="surrogateescape")
  return s
Ejemplo n.º 5
0
from tensorflow.python.saved_model import loader
from tensorflow.python.saved_model import save
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.training import saver
from tensorflow.python.training.tracking import tracking
from tensorflow.python.util.lazy_loader import LazyLoader

# Import TRT library. This is fine since we don't import TF-TRT in
# tensorflow/python/compiler/__init__.py, and `import tensorflow` won't trigger
# importing of TF-TRT. Note that TF-TRT is still included in GPU build since
# tensorflow/python/BUILD depends on it.
#
# We need this import so that when users import this module, they can execute a
# TRT-converted graph without calling any of the methods in this module.
trt_ops.load_trt_ops()

# Lazily load the op, since it's not available in cpu-only builds. Importing
# this at top will cause tests that imports TF-TRT fail when they're built
# and run without CUDA/GPU.
gen_trt_ops = LazyLoader(
    "gen_trt_ops", globals(),
    "tensorflow.compiler.tf2tensorrt.ops.gen_trt_ops")


def _to_bytes(s):
  """Encode s if it is a sequence of chars."""
  if isinstance(s, _six.text_type):
    return s.encode("utf-8", errors="surrogateescape")
  return s
Ejemplo n.º 6
0
  def __init__(self,
               input_saved_model_dir=None,
               input_saved_model_tags=None,
               input_saved_model_signature_key=None,
               input_graph_def=None,
               nodes_blacklist=None,
               session_config=None,
               max_batch_size=1,
               max_workspace_size_bytes=DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
               precision_mode=TrtPrecisionMode.FP32,
               minimum_segment_size=3,
               is_dynamic_op=False,
               maximum_cached_engines=1,
               cached_engine_batches=None,
               use_calibration=True,
               use_function_backup=True):
    """Initialize the converter.

    Args:
      input_saved_model_dir: the directory to load the SavedModel which contains
        the input graph to transforms. Used only when input_graph_def is None.
      input_saved_model_tags: list of tags to load the SavedModel.
      input_saved_model_signature_key: the key of the signature to optimize the
        graph for.
      input_graph_def: a GraphDef object containing a model to be transformed.
        If set to None, the graph will be read from the SavedModel loaded from
        input_saved_model_dir.
      nodes_blacklist: list of node names to prevent the converter from
        touching. Only used when input_graph_def is not None.
      session_config: the ConfigProto used to create a Session. It's also used
        as a template to create a TRT-enabled ConfigProto for conversion. If not
        specified, a default ConfigProto will be used.
      max_batch_size: max size for the input batch.
      max_workspace_size_bytes: the maximum GPU temporary memory which the TRT
        engine can use at execution time. This corresponds to the
        'workspaceSize' parameter of nvinfer1::IBuilder::setMaxWorkspaceSize().
      precision_mode: one of TrtPrecisionMode.supported_precision_modes().
      minimum_segment_size: the minimum number of nodes required for a subgraph
        to be replaced by TRTEngineOp.
      is_dynamic_op: whether to generate dynamic TRT ops which will build the
        TRT network and engine at run time.
      maximum_cached_engines: max number of cached TRT engines in dynamic TRT
        ops. If the number of cached engines is already at max but none of them
        can serve the input, the TRTEngineOp will fall back to run the TF
        function based on which the TRTEngineOp is created.
      cached_engine_batches: a list of batch sizes used to create cached
        engines, only used when is_dynamic_op is True. The length of the list
        should be <= maximum_cached_engines, and the dynamic TRT op will use
        this list to determine the batch sizes of the cached engines, instead of
        making the decision on the fly. This is useful when we know the most
        common batch size(s) the application is going to generate.
      use_calibration: this argument is ignored if precision_mode is not INT8.
        If set to True, a calibration graph will be created to calibrate the
        missing ranges. The calibration graph must be converted to an inference
        graph using calib_graph_to_infer_graph() after running calibration. if
        set to False, quantization nodes will be expected for every tensor in
        the graph (exlcuding those which will be fused). If a range is missing,
        an error will occur. Please note that accuracy may be negatively
        affected if there is a mismatch between which tensors TRT quantizes and
        which tensors were trained with fake quantization.
      use_function_backup: if set to True, it will create a FunctionDef for each
        subgraph that is converted to TRT op, and if TRT ops fail to execute at
        runtime, it'll invoke that function as a fallback.

    Raises:
      ValueError: if the combination of the parameters is invalid.
      RuntimeError: if the TensorRT library version is incompatible.
    """
    super(TrtGraphConverter, self).__init__(
        input_saved_model_dir=input_saved_model_dir,
        input_saved_model_tags=input_saved_model_tags,
        input_saved_model_signature_key=input_saved_model_signature_key,
        input_graph_def=input_graph_def,
        nodes_blacklist=nodes_blacklist,
        session_config=session_config)

    # TODO(laigd): move all the validations below to
    # get_tensorrt_rewriter_config().

    # Lazily load the TF-TRT C bindings, so `import tensorflow` doesn't complain
    # even if it cannot find TensorRT library.
    trt_ops.load_trt_ops()
    # pylint: disable=g-import-not-at-top,line-too-long
    from tensorflow.python.compiler.tensorrt.wrap_conversion import get_linked_tensorrt_version
    from tensorflow.python.compiler.tensorrt.wrap_conversion import get_loaded_tensorrt_version
    # pylint: enable=g-import-not-at-top,line-too-long

    # Check compatibility of TensorRT version.
    compiled_version = get_linked_tensorrt_version()
    loaded_version = get_loaded_tensorrt_version()
    version_mismatch = False
    if loaded_version[0] < compiled_version[0]:
      tf_logging.error(
          "TensorRT version mismatch. Tensorflow was compiled against " +
          "TensorRT %s but library loaded from environment is TensorRT %s" %
          (".".join([str(x) for x in compiled_version]),
           ".".join([str(x) for x in loaded_version])) +
          ". Please make sure that correct version of TensorRT " +
          "is available in the system and added to ldconfig or LD_LIBRARY_PATH")
      raise RuntimeError("Incompatible TensorRT library version")
    for i in zip(loaded_version, compiled_version):
      if i[0] != i[1]:
        tf_logging.warn("TensorRT mismatch. Compiled against version " +
                        "%s, but loaded %s. Things may not work" %
                        (".".join([str(x) for x in compiled_version]),
                         ".".join([str(x) for x in loaded_version])))
        version_mismatch = True
        break
    if not version_mismatch:
      tf_logging.info("Running against TensorRT version %s" %
                      ".".join([str(x) for x in loaded_version]))

    # Check input arguments.
    if precision_mode not in TrtPrecisionMode.supported_precision_modes():
      raise ValueError(("precision mode '{}' is not supported."
                        "It should be one of {}").format(
                            precision_mode,
                            TrtPrecisionMode.supported_precision_modes))

    if cached_engine_batches:
      if not isinstance(cached_engine_batches, list):
        raise TypeError("cached_engine_batches should be a list.")
      if len(cached_engine_batches) > maximum_cached_engines:
        raise ValueError("cached_engine_batches should not contain more than "
                         "maximum_cached_engines items.")

    self._need_calibration = (
        precision_mode == TrtPrecisionMode.INT8 and use_calibration)
    self._use_function_backup = use_function_backup

    # TODO(laigd): consider provide a mechanism to remove the fallback path
    # after calibration is done.
    if self._need_calibration and not use_function_backup:
      raise ValueError(
          "Calibration requires enabling fallback to TF function execution.")

    # TODO(laigd):
    # - Get rid of is_dynamic_op option, it should always be True, and it should
    #   accept N shapes as input.
    # - Verify in int8 mode that maximum_cached_engines and
    #   cached_engine_batches are set appropriately.
    # - If it fails to build the int8 engine it should return error.
    self._max_batch_size = max_batch_size
    self._max_workspace_size_bytes = max_workspace_size_bytes
    self._precision_mode = precision_mode
    self._minimum_segment_size = minimum_segment_size
    self._is_dynamic_op = is_dynamic_op
    self._maximum_cached_engines = maximum_cached_engines
    self._cached_engine_batches = cached_engine_batches
Ejemplo n.º 7
0
  def get_tensorrt_rewriter_config(
      cls,
      rewriter_config_template=None,
      max_batch_size=1,
      max_workspace_size_bytes=DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
      precision_mode=TrtPrecisionMode.FP32,
      minimum_segment_size=3,
      is_dynamic_op=False,
      maximum_cached_engines=1,
      cached_engine_batches=None,
      use_calibration=True,
      use_function_backup=True):
    """Returns a RewriterConfig proto for TRT transformation.

    Args:
      rewriter_config_template: a template RewriterConfig proto used to create a
        TRT-enabled RewriterConfig. If None, it will use a default one.
      max_batch_size: max size for the input batch
      max_workspace_size_bytes: the maximum GPU temporary memory which the TRT
        engine can use at execution time. This corresponds to the
        'workspaceSize' parameter of nvinfer1::IBuilder::setMaxWorkspaceSize().
      precision_mode: one of TrtPrecisionMode.supported_precision_modes().
      minimum_segment_size: the minimum number of nodes required for a subgraph
        to be replaced by TRTEngineOp.
      is_dynamic_op: whether to generate dynamic TRT ops which will build the
        TRT network and engine at run time.
      maximum_cached_engines: max number of cached TRT engines in dynamic TRT
        ops. If the number of cached engines is already at max but none of them
        can serve the input, the TRTEngineOp will fall back to run the TF
        function based on which the TRTEngineOp is created.
      cached_engine_batches: a list of batch sizes used to create cached
        engines, only used when is_dynamic_op is True. The length of the list
        should be <= maximum_cached_engines, and the dynamic TRT op will use
        this list to determine the batch sizes of the cached engines, instead of
        making the decision on the fly. This is useful when we know the most
        common batch size(s) the application is going to generate.
      use_calibration: this argument is ignored if precision_mode is not INT8.
        If set to True, a calibration graph will be created to calibrate the
        missing ranges. The calibration graph must be converted to an inference
        graph using calib_graph_to_infer_graph() after running calibration. if
        set to False, quantization nodes will be expected for every tensor in
        the graph (exlcuding those which will be fused). If a range is missing,
        an error will occur. Please note that accuracy may be negatively
        affected if there is a mismatch between which tensors TRT quantizes and
        which tensors were trained with fake quantization.
      use_function_backup: if set to True, it will create a FunctionDef for each
        subgraph that is converted to TRT op, and if TRT ops fail to execute at
        runtime, it'll invoke that function as a fallback.

    Returns:
      A RewriterConfig proto which sets a TensorRTOptimizer to run Grappler.

    Raises:
      TypeError: if any of the parameters are of unexpected type.
      ValueError: if any of the parameters are of unexpected value.
    """
    # Lazily load the TF-TRT C bindings, so `import tensorflow` doesn't complain
    # even if it cannot find TensorRT library.
    trt_ops.load_trt_ops()
    # pylint: disable=g-import-not-at-top,unused-import,line-too-long,unused-variable
    # Import a random symbol to trigger loading of TRT library.
    from tensorflow.python.compiler.tensorrt.wrap_conversion import get_linked_tensorrt_version
    # pylint: enable=g-import-not-at-top,unused-import,line-too-long,unused-variable

    if rewriter_config_template is not None and not isinstance(
        rewriter_config_template, rewriter_config_pb2.RewriterConfig):
      raise TypeError(
          "rewriter_config_template should be a RewriterConfig proto.")

    rewriter_config_with_trt = rewriter_config_pb2.RewriterConfig()
    if rewriter_config_template is None:
      # Layout optimizer may add Const nodes followed by Reshape nodes, thus we
      # need to run constant folding again.
      rewriter_config_with_trt.optimizers.extend(
          ["constfold", "layout", "constfold"])
      rewriter_config_with_trt.meta_optimizer_iterations = (
          rewriter_config_pb2.RewriterConfig.ONE)
    else:
      rewriter_config_with_trt.CopyFrom(rewriter_config_template)

    optimizer = rewriter_config_with_trt.custom_optimizers.add()
    optimizer.name = "TensorRTOptimizer"
    optimizer.parameter_map["minimum_segment_size"].i = minimum_segment_size
    optimizer.parameter_map["max_batch_size"].i = max_batch_size
    optimizer.parameter_map["is_dynamic_op"].b = is_dynamic_op
    optimizer.parameter_map[
        "max_workspace_size_bytes"].i = max_workspace_size_bytes
    optimizer.parameter_map["precision_mode"].s = _to_bytes(precision_mode)
    optimizer.parameter_map["maximum_cached_engines"].i = maximum_cached_engines
    if cached_engine_batches:
      optimizer.parameter_map["cached_engine_batches"].list.i.extend(
          cached_engine_batches)
    optimizer.parameter_map["use_calibration"].b = use_calibration
    optimizer.parameter_map["use_function_backup"].b = use_function_backup
    return rewriter_config_with_trt