def save_trt():

    if FLAGS.quantize_mode == 'int8':
        conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            precision_mode=trt.TrtPrecisionMode.INT8,
            max_workspace_size_bytes=4000000000,
            use_calibration=True,
            max_batch_size=8)
        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=FLAGS.weights,
            conversion_params=conversion_params)
        converter.convert(calibration_input_fn=representative_data_gen)
    elif FLAGS.quantize_mode == 'float16':
        conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            precision_mode=trt.TrtPrecisionMode.FP16,
            max_workspace_size_bytes=4000000000,
            max_batch_size=8)
        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=FLAGS.weights,
            conversion_params=conversion_params)
        converter.convert()
    else:
        conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            precision_mode=trt.TrtPrecisionMode.FP32,
            max_workspace_size_bytes=4000000000,
            max_batch_size=8)
        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=FLAGS.weights,
            conversion_params=conversion_params)
        converter.convert()

    # converter.build(input_fn=representative_data_gen)
    converter.save(output_saved_model_dir=FLAGS.output)
    print('Done Converting to TF-TRT')

    saved_model_loaded = tf.saved_model.load(FLAGS.output)
    graph_func = saved_model_loaded.signatures[
        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
    trt_graph = graph_func.graph.as_graph_def()
    for n in trt_graph.node:
        print(n.op)
        if n.op == "TRTEngineOp":
            print("Node: %s, %s" % (n.op, n.name.replace("/", "_")))
        else:
            print("Exclude Node: %s, %s" % (n.op, n.name.replace("/", "_")))
    logging.info("model saved to: {}".format(FLAGS.output))

    trt_engine_nodes = len(
        [1 for n in trt_graph.node if str(n.op) == 'TRTEngineOp'])
    print("numb. of trt_engine_nodes in TensorRT graph:", trt_engine_nodes)
    all_nodes = len([1 for n in trt_graph.node])
    print("numb. of all_nodes in TensorRT graph:", all_nodes)
Example #2
0
  def _GetFunc(self, use_trt, model_dir, use_dynamic_shape):
    """Gets the mnist function.

    Args:
      use_trt: whether use TF-TRT to convert the graph.
      model_dir: the model directory to load the checkpoints.
      use_dynamic_shape: whether to run the TF-TRT conversion in dynamic shape
        mode.

    Returns:
      The mnist model function.
    """
    with tempfile.TemporaryDirectory() as tmpdir:
      saved_model_dir = os.path.join(tmpdir, 'mnist')
      self._SaveModel(model_dir, saved_model_dir)

      if use_trt:
        conv_params = trt_convert.TrtConversionParams(
            precision_mode='FP16',
            minimum_segment_size=2,
            max_workspace_size_bytes=1 << 28,
            maximum_cached_engines=1)
        converter = trt_convert.TrtGraphConverterV2(
            input_saved_model_dir=saved_model_dir,
            conversion_params=conv_params,
            use_dynamic_shape=use_dynamic_shape,
            dynamic_shape_profile_strategy='ImplicitBatchModeCompatible')
        converter.convert()
        func = converter._converted_func
      else:
        saved_model_loaded = saved_model_load(
            saved_model_dir, tags=[tag_constants.SERVING])
        func = saved_model_loaded.signatures[
            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
    return func
Example #3
0
def GenerateModelV2(tf_saved_model_dir, tftrt_saved_model_dir):
    """Generate and convert a model using TFv2 API."""
    class SimpleModel(tracking.AutoTrackable):
        """Define model with a TF function."""
        def __init__(self):
            self.v = None

        @def_function.function(input_signature=[
            tensor_spec.TensorSpec(shape=[None, 1, 1], dtype=dtypes.float32),
            tensor_spec.TensorSpec(shape=[None, 1, 1], dtype=dtypes.float32)
        ])
        def run(self, input1, input2):
            if self.v is None:
                self.v = variables.Variable([[[1.0]]], dtype=dtypes.float32)
            return GetGraph(input1, input2, self.v)

    root = SimpleModel()

    # Saved TF model
    save(root, tf_saved_model_dir,
         {signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: root.run})

    # Convert TF model to TensorRT
    converter = trt_convert.TrtGraphConverterV2(
        input_saved_model_dir=tf_saved_model_dir)
    converter.convert()
    converter.save(tftrt_saved_model_dir)
Example #4
0
def export_serving_model(yolo, path, warmup_path=None, with_tensorrt=False):
    overwrite_path(path)
    tf.saved_model.save(yolo.yolo_model, path)
    if with_tensorrt:
        params = trt.TrtConversionParams(
            rewriter_config_template=None,
            max_workspace_size_bytes=trt.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
            precision_mode=trt.TrtPrecisionMode.FP16,
            minimum_segment_size=3,
            is_dynamic_op=True,
            maximum_cached_engines=1,
            use_calibration=True,
            max_batch_size=1)
        converter = trt.TrtGraphConverterV2(input_saved_model_dir=path,
                                            conversion_params=params)
        converter.convert()
        tf.io.gfile.rmtree(path)
        converter.save(path)
    asset_extra = os.path.join(path, "assets.extra")
    tf.io.gfile.mkdir(asset_extra)
    with tf.io.TFRecordWriter(
            os.path.join(asset_extra, "tf_serving_warmup_requests")) as writer:
        request = predict_pb2.PredictRequest()
        request.model_spec.name = 'detection'
        request.model_spec.signature_name = 'serving_default'
        if warmup_path is None:
            warmup_path = input('Please enter warm up image path:')
        image = open(warmup_path, 'rb').read()
        image_data = np.expand_dims(image, 0)
        request.inputs['predict_image'].CopyFrom(
            tf.compat.v1.make_tensor_proto(image_data))
        log = prediction_log_pb2.PredictionLog(
            predict_log=prediction_log_pb2.PredictLog(request=request))
        writer.write(log.SerializeToString())
Example #5
0
def convert_to_TFTRT(saved_model_path,
                     target_path,
                     max_workspace_size_bytes=1 << 22,
                     precision='FP16',
                     minimum_segment_size=3,
                     is_dynamic_op=True,
                     use_calibration=True,
                     max_batch_size=32,
                     calibration_input_fn=None):
    '''
    Args:
        precision: 'FP32', 'FP16' or 'INT8'
        calibration_input_fn: INT8 calibration is needed. A generator function that yields input data as a
            list or tuple, which will be used to execute the converted signature for
            calibration. All the returned input data should have the same shape.
    Source: https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html
    '''
    conversion_params = trt.TrtConversionParams(
        rewriter_config_template=None,
        maximum_cached_engines=1,
        max_workspace_size_bytes=max_workspace_size_bytes,
        precision_mode=precision,
        minimum_segment_size=minimum_segment_size,
        is_dynamic_op=is_dynamic_op,
        use_calibration=use_calibration,
        max_batch_size=max_batch_size)
    converter = trt.TrtGraphConverterV2(input_saved_model_dir=saved_model_path,
                                        conversion_params=conversion_params)
    converter.convert()
    converter.save(target_path)
Example #6
0
 def convertModel(self):
     converter = trt.TrtGraphConverterV2(input_saved_model_dir=self._model_path, conversion_params=self._params)
     converter.convert()
     converter.build(self.gen_fn)
     self._converter.save(self._model_save_path)
     self._converted = True
     return 
Example #7
0
def get_rt_model(modelpath, savepath, input_fn):
    params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(precision_mode='FP16',is_dynamic_op=True, max_workspace_size_bytes=4000000000,max_batch_size=8)
    converter = trt.TrtGraphConverterV2(input_saved_model_dir=modelpath, conversion_params=params)
    converter.convert()
    converter.build(input_fn)
    converter.save(savepath)
    return
Example #8
0
def load_and_convert(path, precision):
    """ Load a saved model and convert it to FP32 or FP16. Return a converter """

    params = copy.deepcopy(trt.DEFAULT_TRT_CONVERSION_PARAMS)

    params = params._replace(
        precision_mode=(trt.TrtPrecisionMode.FP16 if precision.lower()
                        == "fp16" else trt.TrtPrecisionMode.FP32),
        max_batch_size=128,
        max_workspace_size_bytes=2 << 32,  # 8,589,934,592 bytes
        maximum_cached_engines=100,
        minimum_segment_size=3,
        is_dynamic_op=True,
        allow_build_at_runtime=True)

    import pprint
    print("%" * 85)
    pprint.pprint(params)
    print("%" * 85)

    converter = trt.TrtGraphConverterV2(
        input_saved_model_dir=path,
        conversion_params=params,
    )

    return converter
Example #9
0
def load_with_converter(path, precision, batch_size):
    """Loads a saved model using a TF-TRT converter, and returns the converter
    """

    params = copy.deepcopy(trt.DEFAULT_TRT_CONVERSION_PARAMS)
    if precision == 'int8':
        precision_mode = trt.TrtPrecisionMode.INT8
    elif precision == 'fp16':
        precision_mode = trt.TrtPrecisionMode.FP16
    else:
        precision_mode = trt.TrtPrecisionMode.FP32

    params = params._replace(
        precision_mode=precision_mode,
        max_batch_size=batch_size,
        max_workspace_size_bytes=2 << 32,  # 8,589,934,592 bytes
        maximum_cached_engines=100,
        minimum_segment_size=3,
        allow_build_at_runtime=True
    )

    import pprint
    print("%" * 85)
    pprint.pprint(params)
    print("%" * 85)

    converter = trt.TrtGraphConverterV2(
        input_saved_model_dir=path,
        conversion_params=params,
    )

    return converter
Example #10
0
def convert(model_path, output_path, precision, engine):
    if precision == 'fp16':
        precision_mode = trt.TrtPrecisionMode.FP16
    elif precision == 'int8':
        precision_mode = trt.TrtPrecisionMode.int8
    else: 
        precision_mode = trt.TrtPrecisionMode.FP32
    
    if engine:
    conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(precision_mode=trt.TrtPrecisionMode.FP32,
                                                               max_workspace_size_bytes=8000000000)
    converter = trt.TrtGraphConverterV2(input_saved_model_dir=model_path,
                                        conversion_params=conversion_params)

    converter.convert()
    converter.save(output_saved_model_dir=output_path)


@click.group()
def cli():
    pass

if __name__ == '__main__':
    cli.add_command(benchmark)
    cli.add_command(benchmark_trt)
    cli.add_command(convert)
    cli()
Example #11
0
def GenerateModelV2(tf_saved_model_dir, tftrt_saved_model_dir):
    """Generate and convert a model using TFv2 API."""
    class SimpleModel(tracking.AutoTrackable):
        """Define model with a TF function."""
        def __init__(self):
            self.v = None

        @def_function.function(input_signature=[
            tensor_spec.TensorSpec(shape=[None, 1, 1], dtype=dtypes.float32),
            tensor_spec.TensorSpec(shape=[None, 1, 1], dtype=dtypes.float32)
        ])
        def run(self, input1, input2):
            if self.v is None:
                self.v = variables.Variable([[[1.0]]], dtype=dtypes.float32)
            return GetGraph(input1, input2, self.v)

    root = SimpleModel()

    # Saved TF model
    # pylint: disable=not-callable
    save(root, tf_saved_model_dir,
         {signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: root.run})

    # Convert TF model to TensorRT
    converter = trt_convert.TrtGraphConverterV2(
        input_saved_model_dir=tf_saved_model_dir)
    converter.convert()
    try:
        line_length = max(160, os.get_terminal_size().columns)
    except OSError:
        line_length = 160
    converter.summary(line_length=line_length, detailed=True)
    converter.save(tftrt_saved_model_dir)
Example #12
0
 def _create_converter(self, trt_convert_params: trt.TrtConversionParams):
     return trt.TrtGraphConverterV2(
         input_saved_model_dir=self.model_config.saved_model_dir,
         input_saved_model_tags=self.model_config.saved_model_tags,
         input_saved_model_signature_key=(
             self.model_config.saved_model_signature_key),
         conversion_params=trt_convert_params)
Example #13
0
def export():
    """Load the model and export it in one of the format specified by FLAGS.export_format
    IMPORTANT : the export with TensorRT is still work in progress inside Tensorflow. It doesn't work with the 2.0a official version but does
    with a version compiled from master. It will probably evolve in a near futur
    """
    model = load_model()
    logging.info("Model loaded. Summary:")
    model.summary()
    logging.info("Last ops: {}".format(
        [node.op.name for node in model.outputs]))

    if FLAGS.export_format == "saved_model" or FLAGS.export_format == "tensorrt":
        saved_model_path = os.path.join(FLAGS.export_path, "saved_model")
        tf.saved_model.save(model, saved_model_path)
        if FLAGS.export_format == "tensorrt":
            # Convert the SavedModel using TF-TRT, see https://github.com/aaroey/tensorflow/blob/tftrt20/tftrt20/test.py
            conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
                use_function_backup=False,
                precision_mode=FLAGS.trt_precision_mode)
            converter = trt.TrtGraphConverterV2(
                input_saved_model_dir=saved_model_path,
                conversion_params=conversion_params)
            converter.convert()
            converter.save(os.path.join(FLAGS.export_path, "trt"))
    elif FLAGS.export_format == "tflite":
        concrete_func = tf.function(
            lambda x: model(x, training=False)).get_concrete_function(
                x=tf.TensorSpec((1, 480, 640, 3), tf.float32))
        converter = tf.lite.TFLiteConverter.from_concrete_function(
            concrete_func)
        tflite_model = converter.convert()
        open(FLAGS.export_path, "wb").write(tflite_model)
Example #14
0
    def convert(self,
                output_saved_model_dir,
                precision="FP32",
                max_workspace_size_bytes=8000000000,
                **kwargs):

        if precision == "INT8" and self.calibration_data is None:
            raise (Exception("No calibration data set!"))

        trt_precision = precision_dict[precision]
        conversion_params = tf_trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            precision_mode=trt_precision,
            max_workspace_size_bytes=max_workspace_size_bytes,
            use_calibration=precision == "INT8")
        converter = tf_trt.TrtGraphConverterV2(
            input_saved_model_dir=self.input_saved_model_dir,
            conversion_params=conversion_params)

        if precision == "INT8":
            converter.convert(calibration_input_fn=self.calibration_data)
        else:
            converter.convert()

        converter.save(output_saved_model_dir=output_saved_model_dir)

        return OptimizedModel(output_saved_model_dir)
Example #15
0
def convert(model_path, output_path, tf1, precision, max_workspace_size,
            min_segment_size, saved_model_tags, build, batch_shape):
    if not tf1:
        params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            max_workspace_size_bytes=max_workspace_size,
            precision_mode=precision,
            minimum_segment_size=min_segment_size)
        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=model_path,
            input_saved_model_tags=saved_model_tags,
            conversion_params=params)
        try:
            converter.convert()
        except Exception as e:
            raise RuntimeError('{}. Just try passing "--tf1".'.format(e))
        if build or batch_shape[0]:

            def reference_data_gen():
                inp1 = tf.random.normal(size=batch_shape).astype(tf.float32)
                inp2 = tf.random.normal(size=batch_shape).astype(tf.float32)
                yield (inp1, inp2)

            converter.build(reference_data_gen)
        converter.save(output_saved_model_dir=output_path)
    else:
        trt.create_inference_graph(None,
                                   None,
                                   max_batch_size=1,
                                   max_workspace_size_bytes=max_workspace_size,
                                   precision_mode=precision,
                                   minimum_segment_size=minimum_segment_size,
                                   is_dynamic_op=True,
                                   input_saved_model_dir=model_path,
                                   input_saved_model_tags=saved_model_tags,
                                   output_saved_model_dir=output_path)
Example #16
0
 def _CreateConverterV2(self, input_saved_model_dir):
     return trt_convert.TrtGraphConverterV2(
         input_saved_model_dir=input_saved_model_dir,
         input_saved_model_signature_key=_SAVED_MODEL_SIGNATURE_KEY,
         conversion_params=trt_convert.DEFAULT_TRT_CONVERSION_PARAMS.
         _replace(precision_mode=trt_convert.TrtPrecisionMode.FP32,
                  is_dynamic_op=True,
                  maximum_cached_engines=2))
def get_graph_func(
    input_saved_model_dir,
    input_size,
    output_saved_model_dir=None,
    conversion_params=trt.DEFAULT_TRT_CONVERSION_PARAMS,
    use_trt=False,
    calib_files=None,
    num_calib_inputs=None,
    batch_size=None,
    optimize_offline=False,
):
    """Retreives a frozen SavedModel and applies TF-TRT
    use_trt: bool, if true use TensorRT
    precision: str, floating point precision (FP32, FP16, or INT8)
    batch_size: int, batch size for TensorRT optimizations
    returns: TF function that is ready to run for inference
    """
    start_time = time.time()
    graph_func = get_func_from_saved_model(input_saved_model_dir)
    if use_trt:
        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=input_saved_model_dir,
            conversion_params=conversion_params,
        )

        def input_fn(input_files, num_iterations):
            dataset = get_dataset(
                data_files=input_files,
                batch_size=batch_size,
                input_size=input_size,
                mode="validation",
            )
            for i, batch_images in dataset:
                if i >= num_iterations:
                    break
                yield (batch_images, )
                print("  step %d/%d" % (i + 1, num_iterations))
                i += 1

        if conversion_params.precision_mode != "INT8":
            print("Graph conversion...")
            converter.convert()
            if optimize_offline:
                print("Building TensorRT engines...")
                converter.build(input_fn=partial(input_fn, data_files, 1))
            converter.save(output_saved_model_dir=output_saved_model_dir)
            graph_func = get_func_from_saved_model(output_saved_model_dir)
        else:
            print("Graph conversion and INT8 calibration...")
            converter.convert(calibration_input_fn=partial(
                input_fn, calib_files, num_calib_inputs // batch_size))
            if optimize_offline:
                print("Building TensorRT engines...")
                converter.build(input_fn=partial(input_fn, data_files, 1))
            converter.save(output_saved_model_dir=output_saved_model_dir)
            graph_func = get_func_from_saved_model(output_saved_model_dir)
    return graph_func, {"conversion": time.time() - start_time}
Example #18
0
    def convert_to_TF_TRT_graph_and_save(self):

        conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            precision_mode=self.__precision_mode,
            max_workspace_size_bytes=self.__max_workspacesize_byte,
            minimum_segment_size=self.__min_seg_size,
            use_calibration=self.__precision_mode == 'INT8')

        start_time = time.time()
        # freezed_model = self.__model_class.load_saved_model_for_inference()
        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=self.__path_to_model,
            conversion_params=conversion_params)

        def input_fn():
            num_iterations = 100
            dataset, image_ids = utils.load_img_from_folder_update(
                self.__val_data_dir,
                self.__annotation_file,
                self.__batch_size,
                self.__input_size,
                dtype=self.__model_input_type)
            for i, batch_image in enumerate(dataset):
                if i >= num_iterations:
                    break
                yield (batch_image, )
                print(" Calibration / builded  step %d/%d" %
                      (i + 1, num_iterations))
                i += 1

        click.echo(
            click.style(f"\n Using precision mode: {self.__precision_mode}\n",
                        bold=True,
                        fg='green'))

        if self.__precision_mode == trt.TrtPrecisionMode.INT8:
            converter.convert(calibration_input_fn=input_fn)
        else:
            converter.convert()

        if self.__build_engine:
            # Build TensorRT engine File for each subgraph with node bigger than min_seg_size nodes
            click.echo(
                click.style(f"\n Build TensorRT Engine...\n",
                            bold=True,
                            fg='green'))
            converter.build(input_fn=partial(input_fn))

        click.echo(
            click.style(f"\n Saving {self.__model_name} \n",
                        bold=True,
                        fg='green'))
        converter.save(output_saved_model_dir=self.__output_saved_model_dir)
        end_time = time.time()
        click.echo(click.style(f"\n Complet \n", bold=True, fg='green'))

        return self.__converted_model_name, self.__output_saved_model_dir, end_time - start_time
Example #19
0
def get_graph_func(input_saved_model_dir,
                   preprocess_method,
                   input_size,
                   output_saved_model_dir=None,
                   conversion_params=trt.DEFAULT_TRT_CONVERSION_PARAMS,
                   use_trt=False,
                   calib_files=None,
                   num_calib_inputs=None,
                   use_synthetic=False,
                   batch_size=None,
                   optimize_offline=False):
  """Retreives a frozen SavedModel and applies TF-TRT
  use_trt: bool, if true use TensorRT
  precision: str, floating point precision (FP32, FP16, or INT8)
  batch_size: int, batch size for TensorRT optimizations
  returns: TF function that is ready to run for inference
  """
  start_time = time.time()
  graph_func = get_func_from_saved_model(input_saved_model_dir)
  if use_trt:
    converter = trt.TrtGraphConverterV2(
        input_saved_model_dir=input_saved_model_dir,
        conversion_params=conversion_params,
    )
    def input_fn(input_files, num_iterations):
      dataset = get_dataset(data_files=input_files,
                            batch_size=batch_size,
                            use_synthetic=False,
                            preprocess_method=preprocess_method,
                            input_size=input_size,
                            mode='validation')
      for i, (batch_images, _) in enumerate(dataset):
        if i >= num_iterations:
          break
        yield (batch_images,)
        print("  step %d/%d" % (i+1, num_iterations))
        i += 1
    if conversion_params.precision_mode != 'INT8':
      print('Graph conversion...')
      converter.convert()
      if optimize_offline:
        print('Building TensorRT engines...')
        print("ANDYYYYY WE SHOULD NOT SEE THIS!!!!")
        converter.build(input_fn=partial(input_fn, data_files, 1))
      converter.save(output_saved_model_dir=output_saved_model_dir)
      graph_func = get_func_from_saved_model(output_saved_model_dir)
    else:
      print('Graph conversion and INT8 calibration...')
      converter.convert(calibration_input_fn=partial(
          input_fn, calib_files, num_calib_inputs//batch_size))
      if optimize_offline:
        print('Building TensorRT engines...')
        print("INSTANCE 2 OF WE SHOULD NOT SEE THIS")
        converter.build(input_fn=partial(input_fn, data_files, 1))
      converter.save(output_saved_model_dir=output_saved_model_dir)
      graph_func = get_func_from_saved_model(output_saved_model_dir)
  return graph_func, {'conversion': time.time() - start_time}
Example #20
0
    def testTrtGraphConverter_ShapeOp_v2(self):
        """Test case for TrtGraphConverterV2 with ShapeOp."""
        if not is_tensorrt_enabled():
            return

        # TODO(b/185944425): enable the test for TRT before TRT 7.
        ver = get_linked_tensorrt_version()
        if ver[0] < 7:
            return

        class ShapeOpModel(tracking.AutoTrackable):
            def __init__(self):
                self.v = None

            @def_function.function(input_signature=[
                tensor_spec.TensorSpec(shape=[None, None],
                                       dtype=dtypes.float32)
            ])
            def run(self, x):
                q = x + 1
                q_shape = array_ops.shape(q)
                return array_ops.identity(q_shape, name="output")

        np_input = np.random.random_sample([5, 3]).astype(np.float32)

        def _InputFunc():
            yield (np_input, )

        # Create the SavedModel.
        root = ShapeOpModel()
        expected_output = root.run(np_input)
        input_saved_model_dir = self.mkdtemp()
        save.save(root, input_saved_model_dir, signatures=root.run)

        # Convert the graph to TF-TRT.
        conv_params = trt_convert.TrtConversionParams(minimum_segment_size=2)
        converter = trt_convert.TrtGraphConverterV2(
            input_saved_model_dir=input_saved_model_dir,
            conversion_params=conv_params,
            use_dynamic_shape=True)
        converter.convert()

        # Build the graph with the input generator. This runs the TRTEngineOp native
        # segment.
        converter.build(_InputFunc)
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)

        root_with_trt = load.load(output_saved_model_dir)
        converted_signature = root_with_trt.signatures["serving_default"]
        # Check that the graph is converted to one TRTEngineOp.
        self._CheckTrtOps(converted_signature)
        # Run the graph.
        output_with_trt = converted_signature(
            x=ops.convert_to_tensor(np_input))
        # Check the result of the run.
        self.assertAllClose(expected_output, list(output_with_trt.values())[0])
Example #21
0
def convert():
    if not (precision in ['FP32', 'FP16', 'IMT8']):
        raise ValueError('precision must be FP32, FP16 or INT8')

    if precision == 'FP32':
        print('Converting to TF-TRT FP32...')
        conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            precision_mode=trt.TrtPrecisionMode.FP32,
            max_workspace_size_bytes=8000000000)

        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=model_path,
            conversion_params=conversion_params)
        converter.convert()
        converter.save(output_saved_model_dir='saved_model_TFTRT_FP32')
        print('Done Converting to TF-TRT FP32')

    if precision == 'FP16':
        print('Converting to TF-TRT FP32...')
        conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            precision_mode=trt.TrtPrecisionMode.FP16,
            max_workspace_size_bytes=8000000000)

        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=model_path,
            conversion_params=conversion_params)
        converter.convert()
        converter.save(output_saved_model_dir='saved_model_TFTRT_FP16')
        print('Done Converting to TF-TRT FP16')

    if precision == 'INT8':
        print('Converting to TF-TRT INT8...')
        conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            precision_mode=trt.TrtPrecisionMode.INT8,
            max_workspace_size_bytes=8000000000,
            use_calibration=True)

        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=model_path,
            conversion_params=conversion_params)
        # need to calibrate representative data
        converter.convert(calibration_input_fn=calibrate_data_gen)
        converter.save(output_saved_model_dir='saved_model_TFTRT_INT8')
        print('Done Converting to TF-TRT INT8')
Example #22
0
    def testTrtGraphConverter_ShapeOp_Int32InputOutput_v2(self):
        """Testing ShapeOp and int32 values as engine input and outpu."""
        class ShapeOpModel(tracking.AutoTrackable):
            def __init__(self):
                self.v = None

            @def_function.function(input_signature=[
                tensor_spec.TensorSpec(shape=[None, None],
                                       dtype=dtypes.float32)
            ])
            def run(self, x):
                q = x + 1
                q_shape = array_ops.shape(q)
                # Add an OP that is not supported by TF-TRT. This allows TF-TRT to build
                # two engines. The first engine produces an int32 output and the second
                # engines has an int32 input and an int32 output.
                q = nn_ops.data_format_vec_permute(q_shape,
                                                   src_format="NHWC",
                                                   dst_format="NCHW")
                q = q * 2
                return array_ops.identity(q, name="output")

        np_input = np.random.random_sample([5, 3]).astype(np.float32)

        def _InputFunc():
            yield (np_input, )

        # Create the SavedModel.
        root = ShapeOpModel()
        expected_output = root.run(np_input)
        input_saved_model_dir = self.mkdtemp()
        save.save(root, input_saved_model_dir, signatures=root.run)

        # Convert the graph to TF-TRT.
        conv_params = trt_convert.TrtConversionParams(minimum_segment_size=2)
        converter = trt_convert.TrtGraphConverterV2(
            input_saved_model_dir=input_saved_model_dir,
            use_dynamic_shape=True,
            **conv_params._asdict())
        converter.convert()

        # Build the graph with the input generator. This runs the TRTEngineOp native
        # segment.
        converter.build(_InputFunc)
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)

        root_with_trt = load.load(output_saved_model_dir)
        converted_signature = root_with_trt.signatures["serving_default"]
        # Check that the graph is converted to two TRTEngineOps.
        self._CheckTrtOps(converted_signature, num_engines=2)
        # Run the graph.
        output_with_trt = converted_signature(
            x=ops.convert_to_tensor(np_input))
        # Check the result of the run.
        self.assertAllClose(expected_output, list(output_with_trt.values())[0])
def main(args):

    params = trt.TrtConversionParams(precision_mode="FP16",
                                     use_calibration=False)

    converter = trt.TrtGraphConverterV2(input_saved_model_dir=args.saved_model,
                                        conversion_params=params)
    converter.convert()
    converter.build(representative_dataset_gen)
    converter.save(args.saved_model_trt)
Example #24
0
        def _GetModelPaths(model_class):
            input_saved_model_dir = self.mkdtemp()
            root = model_class()
            save.save(root, input_saved_model_dir)

            converter = trt_convert.TrtGraphConverterV2(
                input_saved_model_dir=input_saved_model_dir)
            converter.convert()
            output_saved_model_dir = self.mkdtemp()
            converter.save(output_saved_model_dir)
            return input_saved_model_dir, output_saved_model_dir
Example #25
0
 def convertModel(self):
     converter = trt.TrtGraphConverterV2(
         input_saved_model_dir=self._model_path,
         conversion_params=self._params)
     if self._use_calibration_fn:
         converter.convert(calibration_input_fn=self.callibration_fn)
     else:
         converter.convert()
     converter.build(self.gen_fn)
     converter.save(self._model_save_path)
     self._converted = True
     return
Example #26
0
def compile_source(source: SavedModelFile, config: Config) -> TfTRTSavedModel:
    meta_graph_def = saved_model_utils.get_meta_graph_def(source.model_path, tag_constants.SERVING)
    signature_def_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
    if config.signature_keys:
        signature_def_key = config.signature_keys

    input_length = _get_saved_model_file_inputs(meta_graph_def, signature_def_key)
    input_data_formats = config.input_formats
    if input_data_formats is None:
        input_data_formats = [None for _ in range(input_length)]
    else:
        input_formats_length = len(input_data_formats)

        if input_formats_length != input_length:
            raise ValueError(
                f'Number of input formats ({input_formats_length}) does not match number of inputs ({input_length})'
            )

    precision = 'FP32'

    if config.enable_fp16:
        precision = 'FP16'

    if config.enable_int8:
        precision = 'INT8'

    params = _get_trt_conversion_params(precision_mode=precision)

    input_saved_model_signature_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
    if config.signature_keys:
        input_saved_model_signature_key = config.signature_keys

    converter = trt.TrtGraphConverterV2(input_saved_model_dir=source.model_path,
                                        input_saved_model_tags=tag_constants.SERVING,
                                        input_saved_model_signature_key=input_saved_model_signature_key,
                                        conversion_params=params)

    if config.enable_int8:
        converter.convert(calibration_input_fn=config.calibration_input_fn)
        if config.optimize_offline:
            converter.build(input_fn=config.input_fn)
        tftrt_saved_model = converter

    else:
        converter.convert()
        if config.optimize_offline:
            converter.build(input_fn=config.input_fn)  # pylint: disable=no-member
        tftrt_saved_model = converter
    inputs = _get_inputs(meta_graph_def, signature_def_key, input_data_formats)
    outputs = _get_outputs(meta_graph_def, signature_def_key)

    return TfTRTSavedModel(tftrt_saved_model, inputs=inputs, outputs=outputs)
Example #27
0
 def _CreateConverterV2(
     self,
     input_saved_model_dir,
     input_saved_model_signature_key=_SAVED_MODEL_SIGNATURE_KEY,
     precision_mode=trt_convert.TrtPrecisionMode.FP32):
   return trt_convert.TrtGraphConverterV2(
       input_saved_model_dir=input_saved_model_dir,
       input_saved_model_signature_key=input_saved_model_signature_key,
       conversion_params=trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace(
           max_workspace_size_bytes=10 << 20,  # Use a smaller workspace.
           precision_mode=precision_mode,
           is_dynamic_op=True,
           maximum_cached_engines=2))
Example #28
0
    def _convert(path_saved_model, path_frozen_model):
        """Convert to TensorRT model"""
        conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS
        conversion_params = conversion_params._replace(
            max_workspace_size_bytes=(1 << 32))
        conversion_params = conversion_params._replace(precision_mode="FP16")
        conversion_params = conversion_params._replace(
            maximum_cached_engines=100)

        converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=path_saved_model,
            conversion_params=conversion_params)
        converter.convert()
        converter.save(path_frozen_model)
Example #29
0
 def _CreateConverterV2(
         self,
         input_saved_model_dir,
         input_saved_model_signature_key=_SAVED_MODEL_SIGNATURE_KEY,
         max_workspace_size_bytes=10 << 20,  # Use a smaller workspace.
         precision_mode=trt_convert.TrtPrecisionMode.FP32,
         maximum_cached_engines=2,
         allow_build_at_runtime=True):
     return trt_convert.TrtGraphConverterV2(
         input_saved_model_dir=input_saved_model_dir,
         input_saved_model_signature_key=input_saved_model_signature_key,
         max_workspace_size_bytes=max_workspace_size_bytes,
         precision_mode=precision_mode,
         maximum_cached_engines=maximum_cached_engines,
         allow_build_at_runtime=allow_build_at_runtime)
Example #30
0
def run(**kwargs):
    input_model_dir = kwargs['input_model_dir']
    output_model_dir = kwargs['output_model_dir']
    run_benchmark = kwargs['run_benchmark']
    run_conversion = kwargs['run_conversion']

    if run_conversion:
        params = tf.experimental.tensorrt.ConversionParams(precision_mode='FP16')
        converter = trt.TrtGraphConverterV2(input_saved_model_dir=input_model_dir, conversion_params=params)
        converter.convert()
        converter.build(input_fn=data_feeder)
        converter.save(output_model_dir)

    if run_benchmark:
        benchmark_model(input_model_dir)
        benchmark_model(output_model_dir, custom_op=True)