def GetConversionParams(self, run_params):
     """Return a TrtConversionParams for test."""
     batch_list = []
     for dims_list in self._GetParamsCached().input_dims:
         assert dims_list
         # Each list of shapes should have same batch size.
         input_batches = [dims[0] for dims in dims_list]
         assert max(input_batches) == min(input_batches)
         batch_list.append(input_batches[0])
     conversion_params = trt_convert.TrtConversionParams(
         # We use the minimum of all the batch sizes, so when multiple different
         # input shapes are provided it'll always create new engines in the
         # cache, and we can therefore test the cache behavior.
         rewriter_config_template=None,
         max_workspace_size_bytes=1 << 25,
         precision_mode=run_params.precision_mode,
         minimum_segment_size=2,
         is_dynamic_op=run_params.dynamic_engine,
         maximum_cached_engines=1,
         use_calibration=run_params.use_calibration,
         use_function_backup=False,
         max_batch_size=min(batch_list))
     return conversion_params._replace(
         use_function_backup=IsQuantizationWithCalibration(
             conversion_params))
Exemple #2
0
  def _GetFunc(self, use_trt, model_dir, use_dynamic_shape):
    """Gets the mnist function.

    Args:
      use_trt: whether use TF-TRT to convert the graph.
      model_dir: the model directory to load the checkpoints.
      use_dynamic_shape: whether to run the TF-TRT conversion in dynamic shape
        mode.

    Returns:
      The mnist model function.
    """
    with tempfile.TemporaryDirectory() as tmpdir:
      saved_model_dir = os.path.join(tmpdir, 'mnist')
      self._SaveModel(model_dir, saved_model_dir)

      if use_trt:
        conv_params = trt_convert.TrtConversionParams(
            precision_mode='FP16',
            minimum_segment_size=2,
            max_workspace_size_bytes=1 << 28,
            maximum_cached_engines=1)
        converter = trt_convert.TrtGraphConverterV2(
            input_saved_model_dir=saved_model_dir,
            conversion_params=conv_params,
            use_dynamic_shape=use_dynamic_shape,
            dynamic_shape_profile_strategy='ImplicitBatchModeCompatible')
        converter.convert()
        func = converter._converted_func
      else:
        saved_model_loaded = saved_model_load(
            saved_model_dir, tags=[tag_constants.SERVING])
        func = saved_model_loaded.signatures[
            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
    return func
Exemple #3
0
def export_serving_model(yolo, path, warmup_path=None, with_tensorrt=False):
    overwrite_path(path)
    tf.saved_model.save(yolo.yolo_model, path)
    if with_tensorrt:
        params = trt.TrtConversionParams(
            rewriter_config_template=None,
            max_workspace_size_bytes=trt.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
            precision_mode=trt.TrtPrecisionMode.FP16,
            minimum_segment_size=3,
            is_dynamic_op=True,
            maximum_cached_engines=1,
            use_calibration=True,
            max_batch_size=1)
        converter = trt.TrtGraphConverterV2(input_saved_model_dir=path,
                                            conversion_params=params)
        converter.convert()
        tf.io.gfile.rmtree(path)
        converter.save(path)
    asset_extra = os.path.join(path, "assets.extra")
    tf.io.gfile.mkdir(asset_extra)
    with tf.io.TFRecordWriter(
            os.path.join(asset_extra, "tf_serving_warmup_requests")) as writer:
        request = predict_pb2.PredictRequest()
        request.model_spec.name = 'detection'
        request.model_spec.signature_name = 'serving_default'
        if warmup_path is None:
            warmup_path = input('Please enter warm up image path:')
        image = open(warmup_path, 'rb').read()
        image_data = np.expand_dims(image, 0)
        request.inputs['predict_image'].CopyFrom(
            tf.compat.v1.make_tensor_proto(image_data))
        log = prediction_log_pb2.PredictionLog(
            predict_log=prediction_log_pb2.PredictLog(request=request))
        writer.write(log.SerializeToString())
Exemple #4
0
def convert_to_TFTRT(saved_model_path,
                     target_path,
                     max_workspace_size_bytes=1 << 22,
                     precision='FP16',
                     minimum_segment_size=3,
                     is_dynamic_op=True,
                     use_calibration=True,
                     max_batch_size=32,
                     calibration_input_fn=None):
    '''
    Args:
        precision: 'FP32', 'FP16' or 'INT8'
        calibration_input_fn: INT8 calibration is needed. A generator function that yields input data as a
            list or tuple, which will be used to execute the converted signature for
            calibration. All the returned input data should have the same shape.
    Source: https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html
    '''
    conversion_params = trt.TrtConversionParams(
        rewriter_config_template=None,
        maximum_cached_engines=1,
        max_workspace_size_bytes=max_workspace_size_bytes,
        precision_mode=precision,
        minimum_segment_size=minimum_segment_size,
        is_dynamic_op=is_dynamic_op,
        use_calibration=use_calibration,
        max_batch_size=max_batch_size)
    converter = trt.TrtGraphConverterV2(input_saved_model_dir=saved_model_path,
                                        conversion_params=conversion_params)
    converter.convert()
    converter.save(target_path)
Exemple #5
0
    def testTrtGraphConverter_ShapeOp_v2(self):
        """Test case for TrtGraphConverterV2 with ShapeOp."""
        if not is_tensorrt_enabled():
            return

        # TODO(b/185944425): enable the test for TRT before TRT 7.
        ver = get_linked_tensorrt_version()
        if ver[0] < 7:
            return

        class ShapeOpModel(tracking.AutoTrackable):
            def __init__(self):
                self.v = None

            @def_function.function(input_signature=[
                tensor_spec.TensorSpec(shape=[None, None],
                                       dtype=dtypes.float32)
            ])
            def run(self, x):
                q = x + 1
                q_shape = array_ops.shape(q)
                return array_ops.identity(q_shape, name="output")

        np_input = np.random.random_sample([5, 3]).astype(np.float32)

        def _InputFunc():
            yield (np_input, )

        # Create the SavedModel.
        root = ShapeOpModel()
        expected_output = root.run(np_input)
        input_saved_model_dir = self.mkdtemp()
        save.save(root, input_saved_model_dir, signatures=root.run)

        # Convert the graph to TF-TRT.
        conv_params = trt_convert.TrtConversionParams(minimum_segment_size=2)
        converter = trt_convert.TrtGraphConverterV2(
            input_saved_model_dir=input_saved_model_dir,
            conversion_params=conv_params,
            use_dynamic_shape=True)
        converter.convert()

        # Build the graph with the input generator. This runs the TRTEngineOp native
        # segment.
        converter.build(_InputFunc)
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)

        root_with_trt = load.load(output_saved_model_dir)
        converted_signature = root_with_trt.signatures["serving_default"]
        # Check that the graph is converted to one TRTEngineOp.
        self._CheckTrtOps(converted_signature)
        # Run the graph.
        output_with_trt = converted_signature(
            x=ops.convert_to_tensor(np_input))
        # Check the result of the run.
        self.assertAllClose(expected_output, list(output_with_trt.values())[0])
    def testTrtGraphConverter_ShapeOp_Int32InputOutput_v2(self):
        """Testing ShapeOp and int32 values as engine input and outpu."""
        class ShapeOpModel(tracking.AutoTrackable):
            def __init__(self):
                self.v = None

            @def_function.function(input_signature=[
                tensor_spec.TensorSpec(shape=[None, None],
                                       dtype=dtypes.float32)
            ])
            def run(self, x):
                q = x + 1
                q_shape = array_ops.shape(q)
                # Add an OP that is not supported by TF-TRT. This allows TF-TRT to build
                # two engines. The first engine produces an int32 output and the second
                # engines has an int32 input and an int32 output.
                q = nn_ops.data_format_vec_permute(q_shape,
                                                   src_format="NHWC",
                                                   dst_format="NCHW")
                q = q * 2
                return array_ops.identity(q, name="output")

        np_input = np.random.random_sample([5, 3]).astype(np.float32)

        def _InputFunc():
            yield (np_input, )

        # Create the SavedModel.
        root = ShapeOpModel()
        expected_output = root.run(np_input)
        input_saved_model_dir = self.mkdtemp()
        save.save(root, input_saved_model_dir, signatures=root.run)

        # Convert the graph to TF-TRT.
        conv_params = trt_convert.TrtConversionParams(minimum_segment_size=2)
        converter = trt_convert.TrtGraphConverterV2(
            input_saved_model_dir=input_saved_model_dir,
            use_dynamic_shape=True,
            **conv_params._asdict())
        converter.convert()

        # Build the graph with the input generator. This runs the TRTEngineOp native
        # segment.
        converter.build(_InputFunc)
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)

        root_with_trt = load.load(output_saved_model_dir)
        converted_signature = root_with_trt.signatures["serving_default"]
        # Check that the graph is converted to two TRTEngineOps.
        self._CheckTrtOps(converted_signature, num_engines=2)
        # Run the graph.
        output_with_trt = converted_signature(
            x=ops.convert_to_tensor(np_input))
        # Check the result of the run.
        self.assertAllClose(expected_output, list(output_with_trt.values())[0])
def main(args):

    params = trt.TrtConversionParams(precision_mode="FP16",
                                     use_calibration=False)

    converter = trt.TrtGraphConverterV2(input_saved_model_dir=args.saved_model,
                                        conversion_params=params)
    converter.convert()
    converter.build(representative_dataset_gen)
    converter.save(args.saved_model_trt)
 def GetConversionParams(self, run_params):
   """Returns a TrtConversionParams for test."""
   conversion_params = trt_convert.TrtConversionParams(
       # We use the minimum of all the batch sizes, so when multiple different
       # input shapes are provided it'll always create new engines in the
       # cache, and we can therefore test the cache behavior.
       max_workspace_size_bytes=1 << 25,
       precision_mode=run_params.precision_mode,
       minimum_segment_size=2,
       maximum_cached_engines=1,
       use_calibration=run_params.use_calibration)
   return conversion_params
def export_model(model_dir, prec, tf_trt_model_dir=None):
    model = tf.keras.models.load_model(
        os.path.join(model_dir, f'saved_model_{prec}'))
    input_shape = [1, 572, 572, 1]
    dummy_input = tf.constant(
        tf.zeros(input_shape,
                 dtype=tf.float32 if prec == "fp32" else tf.float16))
    _ = model(dummy_input, training=False)

    trt_prec = trt.TrtPrecisionMode.FP32 if prec == "fp32" else trt.TrtPrecisionMode.FP16
    converter = trt.TrtGraphConverterV2(
        input_saved_model_dir=os.path.join(model_dir, f'saved_model_{prec}'),
        conversion_params=trt.TrtConversionParams(precision_mode=trt_prec),
    )
    converter.convert()
    tf_trt_model_dir = tf_trt_model_dir or f'/tmp/tf-trt_model_{prec}'
    converter.save(tf_trt_model_dir)
    print(f"TF-TRT model saved at {tf_trt_model_dir}")
Exemple #10
0
  def _GetFunc(self, use_trt, model_dir, use_dynamic_shape):
    """Gets the mnist function.

    Args:
      use_trt: whether use TF-TRT to convert the graph.
      model_dir: the model directory to load the checkpoints.
      use_dynamic_shape: whether to run the TF-TRT conversion in dynamic shape
        mode.

    Returns:
      The mnist model function.
    """
    with tempfile.TemporaryDirectory() as tmpdir:
      saved_model_dir = os.path.join(tmpdir, 'mnist')
      self._SaveModel(model_dir, saved_model_dir)

      if use_trt:
        conv_params = trt_convert.TrtConversionParams(
            precision_mode='FP16',
            minimum_segment_size=2,
            max_workspace_size_bytes=(
                trt_convert.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES),
            maximum_cached_engines=1)
        converter = trt_convert.TrtGraphConverterV2(
            input_saved_model_dir=saved_model_dir,
            use_dynamic_shape=use_dynamic_shape,
            dynamic_shape_profile_strategy='ImplicitBatchModeCompatible',
            **conv_params._asdict())
        converter.convert()
        try:
          line_length = max(160, os.get_terminal_size().columns)
        except OSError:
          line_length = 160
        converter.summary(line_length=line_length, detailed=True)
        func = converter._converted_func
      else:
        saved_model_loaded = saved_model_load(
            saved_model_dir, tags=[tag_constants.SERVING])
        func = saved_model_loaded.signatures[
            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
    return func
    import imagenet
    train_ds = imagenet.load_ds(
        2 * (args.input, )).take(192).map(lambda x, y: tf.cast(x, 'uint8'))
    train_ds = train_ds.batch(64)
    data_len = 50000 // 64
    count = 0
    print('Training:')
    for batch in train_ds:
        print('\r%d/%d' % (count, data_len), end='')
        count += 1
        yield (batch, )


params = trt.TrtConversionParams(rewriter_config_template=None,
                                 max_workspace_size_bytes=1 << 30,
                                 precision_mode=args.mode,
                                 minimum_segment_size=3,
                                 is_dynamic_op=True,
                                 maximum_cached_engines=1,
                                 use_calibration=True,
                                 max_batch_size=64,
                                 allow_build_at_runtime=True)

converter = trt.TrtGraphConverterV2(input_saved_model_dir=args.mod_path,
                                    conversion_params=params)
calib_input = input_fn if args.mode == 'INT8' else None
converter.convert(calib_input)
if args.build:
    converter.build(input_fn=input_fn)
converter.save(args.out_path)