def _TestCreateInferenceGraph(self,
                                  input_saved_model_dir=None,
                                  output_saved_model_dir=None):
        """General method to test trt_convert.create_inference_graph()."""
        input_graph_def = None if input_saved_model_dir else self._GetGraphDef(
        )
        output_graph_def = trt_convert.create_inference_graph(
            input_graph_def, ["output"],
            input_saved_model_dir=input_saved_model_dir,
            output_saved_model_dir=output_saved_model_dir,
            session_config=self._GetConfigProto())
        graph_defs_to_verify = [output_graph_def]
        if output_saved_model_dir is not None:
            saved_model_graph_def = saved_model_utils.get_meta_graph_def(
                output_saved_model_dir, tag_constants.SERVING).graph_def
            self.assertTrue(
                isinstance(saved_model_graph_def, graph_pb2.GraphDef))
            graph_defs_to_verify.append(saved_model_graph_def)

        for graph_def in graph_defs_to_verify:
            node_name_to_op = {node.name: node.op for node in graph_def.node}
            self.assertEqual(
                {
                    "input": "Placeholder",
                    "my_trt_op_0": "TRTEngineOp",
                    "output": "Identity"
                }, node_name_to_op)
Beispiel #2
0
    def _GetTrtGraphDef(self, run_params, gdef):
        """Return trt converted graphdef."""
        params = self._GetParamsCached()
        conversion_params = self.GetConversionParams(run_params)
        logging.info(conversion_params)

        config_for_trt = config_pb2.ConfigProto(
            gpu_options=self._GetGPUOptions())
        if conversion_params.rewriter_config is not None:
            config_for_trt.graph_options.rewrite_options.CopyFrom(
                conversion_params.rewriter_config)
        return trt_convert.create_inference_graph(
            input_graph_def=gdef,
            outputs=params.input_names + params.output_names,
            max_batch_size=conversion_params.max_batch_size,
            max_workspace_size_bytes=conversion_params.
            max_workspace_size_bytes,
            precision_mode=conversion_params.precision_mode,
            minimum_segment_size=conversion_params.minimum_segment_size,
            is_dynamic_op=conversion_params.is_dynamic_op,
            maximum_cached_engines=conversion_params.maximum_cached_engines,
            cached_engine_batch_sizes=conversion_params.
            cached_engine_batch_sizes,
            use_calibration=conversion_params.use_calibration,
            session_config=config_for_trt)
 def _GetTrtGraphDef(self, params, gdef, precision_mode, is_dynamic_op):
   """Return trt converted graphdef."""
   return trt_convert.create_inference_graph(
       input_graph_def=gdef,
       outputs=[self.output_name],
       max_batch_size=max([dims[0] for dims in params.input_dims]),
       max_workspace_size_bytes=1 << 25,
       precision_mode=precision_mode,
       minimum_segment_size=2,
       is_dynamic_op=is_dynamic_op)
 def _GetTrtGraphDef(self, run_params, gdef):
   """Return trt converted graphdef."""
   params = self._GetParamsCached()
   trt_params = self.GetConversionParams(run_params)
   logging.info(trt_params)
   return trt_convert.create_inference_graph(
       input_graph_def=gdef,
       outputs=params.input_names + params.output_names,
       max_batch_size=trt_params.max_batch_size,
       max_workspace_size_bytes=trt_params.max_workspace_size_bytes,
       precision_mode=trt_params.precision_mode,
       minimum_segment_size=trt_params.minimum_segment_size,
       is_dynamic_op=trt_params.is_dynamic_op,
       maximum_cached_engines=trt_params.maximum_cached_engines,
       cached_engine_batch_sizes=trt_params.cached_engine_batch_sizes)
 def testCreateInferenceGraph_MinimumSegmentSize(self):
   if not trt_convert.is_tensorrt_enabled():
     return
   output_graph_def = trt_convert.create_inference_graph(
       self._GetGraphDef(), ["output"],
       minimum_segment_size=5,
       is_dynamic_op=False)
   node_name_to_op = {node.name: node.op for node in output_graph_def.node}
   self.assertEqual({
       "v1/read": "Const",
       "input": "Placeholder",
       "add": "Add",
       "mul": "Mul",
       "add_1": "Add",
       "output": "Identity"
   }, node_name_to_op)
Beispiel #6
0
 def testCreateInferenceGraph_MinimumSegmentSize(self):
   if not trt_convert.is_tensorrt_enabled():
     return
   output_graph_def = trt_convert.create_inference_graph(
       self._GetGraphDef(), ["output"],
       minimum_segment_size=5,
       is_dynamic_op=False)
   node_name_to_op = {node.name: node.op for node in output_graph_def.node}
   self.assertEqual({
       "v1/read": "Const",
       "input": "Placeholder",
       "add": "Add",
       "mul": "Mul",
       "add_1": "Add",
       "output": "Identity"
   }, node_name_to_op)
    def _GetGraphDef(self, use_trt, max_batch_size, model_dir):
        """Get the frozen mnist GraphDef.

    Args:
      use_trt: whether use TF-TRT to convert the graph.
      max_batch_size: the max batch size to apply during TF-TRT conversion.
      model_dir: the model directory to load the checkpoints.

    Returns:
      The frozen mnist GraphDef.
    """
        graph = ops.Graph()
        with self.session(graph=graph) as sess:
            with graph.device('/GPU:0'):
                x = array_ops.placeholder(shape=(None, 28, 28, 1),
                                          dtype=dtypes.float32,
                                          name=INPUT_NODE_NAME)
                self._BuildGraph(x)
            # Load weights
            mnist_saver = saver.Saver()
            checkpoint_file = latest_checkpoint(model_dir)
            mnist_saver.restore(sess, checkpoint_file)
            # Freeze
            graph_def = graph_util.convert_variables_to_constants(
                sess, sess.graph_def, output_node_names=[OUTPUT_NODE_NAME])
        # Convert with TF-TRT
        if use_trt:
            logging.info('Number of nodes before TF-TRT conversion: %d',
                         len(graph_def.node))
            graph_def = trt_convert.create_inference_graph(
                graph_def,
                outputs=[OUTPUT_NODE_NAME],
                max_batch_size=max_batch_size,
                precision_mode='INT8',
                # There is a 2GB GPU memory limit for each test, so we set
                # max_workspace_size_bytes to 256MB to leave enough room for TF
                # runtime to allocate GPU memory.
                max_workspace_size_bytes=1 << 28,
                minimum_segment_size=2,
                use_calibration=False,
            )
            logging.info('Number of nodes after TF-TRT conversion: %d',
                         len(graph_def.node))
            num_engines = len(
                [1 for n in graph_def.node if str(n.op) == 'TRTEngineOp'])
            self.assertEqual(1, num_engines)
        return graph_def
  def _GetGraphDef(self, use_trt, max_batch_size, model_dir):
    """Get the frozen mnist GraphDef.

    Args:
      use_trt: whether use TF-TRT to convert the graph.
      max_batch_size: the max batch size to apply during TF-TRT conversion.
      model_dir: the model directory to load the checkpoints.

    Returns:
      The frozen mnist GraphDef.
    """
    graph = ops.Graph()
    with self.session(graph=graph) as sess:
      with graph.device('/GPU:0'):
        x = array_ops.placeholder(
            shape=(None, 28, 28, 1), dtype=dtypes.float32, name=INPUT_NODE_NAME)
        self._BuildGraph(x)
      # Load weights
      mnist_saver = saver.Saver()
      checkpoint_file = latest_checkpoint(model_dir)
      mnist_saver.restore(sess, checkpoint_file)
      # Freeze
      graph_def = graph_util.convert_variables_to_constants(
          sess, sess.graph_def, output_node_names=[OUTPUT_NODE_NAME])
    # Convert with TF-TRT
    if use_trt:
      logging.info('Number of nodes before TF-TRT conversion: %d',
                   len(graph_def.node))
      graph_def = trt_convert.create_inference_graph(
          graph_def,
          outputs=[OUTPUT_NODE_NAME],
          max_batch_size=max_batch_size,
          precision_mode='INT8',
          # There is a 2GB GPU memory limit for each test, so we set
          # max_workspace_size_bytes to 256MB to leave enough room for TF
          # runtime to allocate GPU memory.
          max_workspace_size_bytes=1 << 28,
          minimum_segment_size=2,
          use_calibration=False,
      )
      logging.info('Number of nodes after TF-TRT conversion: %d',
                   len(graph_def.node))
      num_engines = len(
          [1 for n in graph_def.node if str(n.op) == 'TRTEngineOp'])
      self.assertEqual(1, num_engines)
    return graph_def
  def _GetTrtGraphDef(self, run_params, graph_state, gdef):
    """Return trt converted graphdef."""
    params = self._GetParamsCached()
    conversion_params = self.GetConversionParams(run_params)
    logging.info(conversion_params)

    config_for_trt = self._GetConfigProto(run_params, graph_state)
    return trt_convert.create_inference_graph(
        input_graph_def=gdef,
        outputs=params.input_names + params.output_names,
        max_batch_size=conversion_params.max_batch_size,
        max_workspace_size_bytes=conversion_params.max_workspace_size_bytes,
        precision_mode=conversion_params.precision_mode,
        minimum_segment_size=conversion_params.minimum_segment_size,
        is_dynamic_op=conversion_params.is_dynamic_op,
        maximum_cached_engines=conversion_params.maximum_cached_engines,
        cached_engine_batches=conversion_params.cached_engine_batches,
        use_calibration=conversion_params.use_calibration,
        session_config=config_for_trt)
    def testCreateInferenceGraph_DynamicOp(self):
        if not trt_convert.is_tensorrt_enabled():
            return
        trt_convert.enable_test_value()

        tmp_dir = self.get_temp_dir()
        input_saved_model_dir = os.path.join(tmp_dir, "in_dir2")
        output_saved_model_dir = os.path.join(tmp_dir, "out_dir2")
        self._WriteInputSavedModel(input_saved_model_dir)
        output_graph_def = trt_convert.create_inference_graph(
            None,
            None,
            is_dynamic_op=True,
            maximum_cached_engines=2,
            input_saved_model_dir=input_saved_model_dir,
            output_saved_model_dir=output_saved_model_dir,
            session_config=self._GetConfigProto())

        # Test the output GraphDef.
        with ops.Graph().as_default():
            importer.import_graph_def(output_graph_def, name="")
            with self.test_session(config=self._GetConfigProto()) as sess:
                # Run with batch size 1, a new engine is created and cached.
                self._TestRun(sess, 1, True)
                # Run with batch size 2, a new engine is created and cached.
                self._TestRun(sess, 2, True)
                # Run with batch size 3, since the number of cached engines has reached
                # the max, it should fall back to TF function.
                self._TestRun(sess, 3, False)

        # Test the output SavedModel
        with ops.Graph().as_default():
            with self.test_session(config=self._GetConfigProto()) as sess:
                loader.load(sess, [tag_constants.SERVING],
                            output_saved_model_dir)
                # Run with batch size 1, a new engine is created and cached.
                self._TestRun(sess, 1, True)
                # Run with batch size 2, a new engine is created and cached.
                self._TestRun(sess, 2, True)
                # Run with batch size 3, since the number of cached engines has reached
                # the max, it should fall back to TF function.
                self._TestRun(sess, 3, False)
  def testCreateInferenceGraph_DynamicOp(self):
    if not trt_convert.is_tensorrt_enabled():
      return
    trt_convert.enable_test_value()

    tmp_dir = self.get_temp_dir()
    input_saved_model_dir = os.path.join(tmp_dir, "in_dir2")
    output_saved_model_dir = os.path.join(tmp_dir, "out_dir2")
    self._WriteInputSavedModel(input_saved_model_dir)
    output_graph_def = trt_convert.create_inference_graph(
        None,
        None,
        is_dynamic_op=True,
        maximum_cached_engines=2,
        input_saved_model_dir=input_saved_model_dir,
        output_saved_model_dir=output_saved_model_dir,
        session_config=self._GetConfigProto())

    # Test the output GraphDef.
    with ops.Graph().as_default():
      importer.import_graph_def(output_graph_def, name="")
      with self.test_session(config=self._GetConfigProto()) as sess:
        # Run with batch size 1, a new engine is created and cached.
        self._TestRun(sess, 1, True)
        # Run with batch size 2, a new engine is created and cached.
        self._TestRun(sess, 2, True)
        # Run with batch size 3, since the number of cached engines has reached
        # the max, it should fall back to TF function.
        self._TestRun(sess, 3, False)

    # Test the output SavedModel
    with ops.Graph().as_default():
      with self.test_session(config=self._GetConfigProto()) as sess:
        loader.load(sess, [tag_constants.SERVING], output_saved_model_dir)
        # Run with batch size 1, a new engine is created and cached.
        self._TestRun(sess, 1, True)
        # Run with batch size 2, a new engine is created and cached.
        self._TestRun(sess, 2, True)
        # Run with batch size 3, since the number of cached engines has reached
        # the max, it should fall back to TF function.
        self._TestRun(sess, 3, False)
    def testCreateInferenceGraph_StaticOp(self):
        if not trt_convert.is_tensorrt_enabled():
            return
        trt_convert.enable_test_value()

        tmp_dir = self.get_temp_dir()
        input_saved_model_dir = os.path.join(tmp_dir, "in_dir3")
        output_saved_model_dir = os.path.join(tmp_dir, "out_dir3")
        self._WriteInputSavedModel(input_saved_model_dir)
        output_graph_def = trt_convert.create_inference_graph(
            None,
            None,
            max_batch_size=1,
            is_dynamic_op=False,
            maximum_cached_engines=2,  # This is noop, added just for testing.
            input_saved_model_dir=input_saved_model_dir,
            output_saved_model_dir=output_saved_model_dir,
            session_config=self._GetConfigProto())

        # Test the output GraphDef.
        with ops.Graph().as_default():
            importer.import_graph_def(output_graph_def, name="")
            with self.test_session(config=self._GetConfigProto()) as sess:
                # Run with batch size 1, the default engine embedded in the graphdef
                # will be used.
                self._TestRun(sess, 1, True)
                # Run with batch size 2, which exceed the max_batch_size, it should fall
                # back to TF function.
                self._TestRun(sess, 2, False)

        # Test the output SavedModel
        with ops.Graph().as_default():
            with self.test_session(config=self._GetConfigProto()) as sess:
                loader.load(sess, [tag_constants.SERVING],
                            output_saved_model_dir)
                # Run with batch size 1, the default engine embedded in the graphdef
                # will be used.
                self._TestRun(sess, 1, True)
                # Run with batch size 2, which exceed the max_batch_size, it should fall
                # back to TF function.
                self._TestRun(sess, 2, False)
  def _GetTrtGraphDef(self, run_params, gdef):
    """Return trt converted graphdef."""
    params = self._GetParamsCached()
    conversion_params = self.GetConversionParams(run_params)
    logging.info(conversion_params)

    config_for_trt = config_pb2.ConfigProto(gpu_options=self._GetGPUOptions())
    if conversion_params.rewriter_config is not None:
      config_for_trt.graph_options.rewrite_options.CopyFrom(
          conversion_params.rewriter_config)
    return trt_convert.create_inference_graph(
        input_graph_def=gdef,
        outputs=params.input_names + params.output_names,
        max_batch_size=conversion_params.max_batch_size,
        max_workspace_size_bytes=conversion_params.max_workspace_size_bytes,
        precision_mode=conversion_params.precision_mode,
        minimum_segment_size=conversion_params.minimum_segment_size,
        is_dynamic_op=conversion_params.is_dynamic_op,
        maximum_cached_engines=conversion_params.maximum_cached_engines,
        cached_engine_batch_sizes=conversion_params.cached_engine_batch_sizes,
        session_config=config_for_trt)
  def testCreateInferenceGraph_StaticOp(self):
    if not trt_convert.is_tensorrt_enabled():
      return
    trt_convert.enable_test_value()

    tmp_dir = self.get_temp_dir()
    input_saved_model_dir = os.path.join(tmp_dir, "in_dir3")
    output_saved_model_dir = os.path.join(tmp_dir, "out_dir3")
    self._WriteInputSavedModel(input_saved_model_dir)
    output_graph_def = trt_convert.create_inference_graph(
        None,
        None,
        max_batch_size=1,
        is_dynamic_op=False,
        maximum_cached_engines=2,  # This is noop, added just for testing.
        input_saved_model_dir=input_saved_model_dir,
        output_saved_model_dir=output_saved_model_dir,
        session_config=self._GetConfigProto())

    # Test the output GraphDef.
    with ops.Graph().as_default():
      importer.import_graph_def(output_graph_def, name="")
      with self.test_session(config=self._GetConfigProto()) as sess:
        # Run with batch size 1, the default engine embedded in the graphdef
        # will be used.
        self._TestRun(sess, 1, True)
        # Run with batch size 2, which exceed the max_batch_size, it should fall
        # back to TF function.
        self._TestRun(sess, 2, False)

    # Test the output SavedModel
    with ops.Graph().as_default():
      with self.test_session(config=self._GetConfigProto()) as sess:
        loader.load(sess, [tag_constants.SERVING], output_saved_model_dir)
        # Run with batch size 1, the default engine embedded in the graphdef
        # will be used.
        self._TestRun(sess, 1, True)
        # Run with batch size 2, which exceed the max_batch_size, it should fall
        # back to TF function.
        self._TestRun(sess, 2, False)
  def _TestCreateInferenceGraph(self,
                                input_saved_model_dir=None,
                                output_saved_model_dir=None):
    """General method to test trt_convert.create_inference_graph()."""
    input_graph_def = None if input_saved_model_dir else self._GetGraphDef()
    output_graph_def = trt_convert.create_inference_graph(
        input_graph_def, ["output"],
        input_saved_model_dir=input_saved_model_dir,
        output_saved_model_dir=output_saved_model_dir,
        session_config=self._GetConfigProto())
    graph_defs_to_verify = [output_graph_def]
    if output_saved_model_dir is not None:
      saved_model_graph_def = saved_model_utils.get_meta_graph_def(
          output_saved_model_dir, tag_constants.SERVING).graph_def
      self.assertTrue(isinstance(saved_model_graph_def, graph_pb2.GraphDef))
      graph_defs_to_verify.append(saved_model_graph_def)

    for graph_def in graph_defs_to_verify:
      node_name_to_op = {node.name: node.op for node in graph_def.node}
      self.assertEqual({
          "input": "Placeholder",
          "my_trt_op_0": "TRTEngineOp",
          "output": "Identity"
      }, node_name_to_op)