Beispiel #1
0
def get_trt_converter(graph_def,
                      precision_mode,
                      output_node,
                      batch_size=128,
                      workspace_size=2 << 10):
    """ Create a TrtGraphConverter Object to use later

    Args:
      graph_def: GraphDef, the Frozen Graph to be converted.
      precision_mode: string, the precision that TensorRT should convert into.
        Options- FP32, FP16, INT8.
      output_node: string, the names of the output node that will
        be returned during inference.
      batch_size: int, the number of examples that will be predicted at a time.
      workspace_size: int, size in megabytes that can be used during conversion.

    Returns:
      TrtGraphConverter Object
    """
    return trt.TrtGraphConverter(
        input_graph_def=graph_def,
        nodes_blacklist=output_node,
        max_batch_size=batch_size,
        # max_workspace_size_bytes=workspace_size<<20,
        precision_mode=precision_mode)
    def _ConvertGraphV1(self,
                        output_saved_model_dir=None,
                        need_calibration=False,
                        max_batch_size=1,
                        minimum_segment_size=3,
                        is_dynamic_op=False,
                        maximum_cached_engines=1,
                        device=None):
        """Helper method to convert a GraphDef or SavedModel using TF-TRT."""
        input_saved_model_dir = None
        if output_saved_model_dir:
            input_saved_model_dir = self.mkdtemp()
            self._WriteInputSavedModelForV1(input_saved_model_dir, device)

        # Calibration requires dynamic_op.
        if need_calibration:
            is_dynamic_op = True

        # For dynamic_op, the converter requires the unused max_batch_size=None.
        if is_dynamic_op:
            max_batch_size = None

        converter = trt_convert.TrtGraphConverter(
            input_saved_model_dir=input_saved_model_dir,
            input_saved_model_signature_key=_SAVED_MODEL_SIGNATURE_KEY,
            input_graph_def=None
            if input_saved_model_dir else self._GetGraphDefForV1(device),
            nodes_denylist=None if input_saved_model_dir else ["output"],
            max_batch_size=max_batch_size,
            max_workspace_size_bytes=TrtConvertTest.
            _TRT_MAX_WORKSPACE_SIZE_BYTES,
            precision_mode=(trt_convert.TrtPrecisionMode.INT8
                            if need_calibration else
                            trt_convert.TrtPrecisionMode.FP32),
            minimum_segment_size=minimum_segment_size,
            is_dynamic_op=is_dynamic_op,
            maximum_cached_engines=maximum_cached_engines)
        output_graph_def = converter.convert()

        if need_calibration:

            class CalibrationData(object):
                def __init__(self):
                    self._data = 0

                def next(self):
                    self._data += 1
                    return {
                        "input1:0": [[[self._data]]],
                        "input2:0": [[[self._data]]]
                    }

            output_graph_def = converter.calibrate(
                fetch_names=["output:0"],
                num_runs=10,
                feed_dict_fn=CalibrationData().next)

        if output_saved_model_dir is not None:
            converter.save(output_saved_model_dir=output_saved_model_dir)
        return output_graph_def
Beispiel #3
0
    def __init__(self, frozen_path, gpu_mem_fraction=0.5):
        self.GPU_MEM_FRACTION = gpu_mem_fraction
        self.outputs = ['policy_head/Softmax', 'value_head/Tanh']
        with gfile.FastGFile(frozen_path, 'rb') as f:
            frozen_graph = tf.GraphDef()
            frozen_graph.ParseFromString(f.read())

        trt_converter = trt.TrtGraphConverter(input_graph_def=frozen_graph,
                                              nodes_blacklist=self.outputs,
                                              is_dynamic_op=True,
                                              precision_mode='INT8')
        trt_graph = trt_converter.convert()
        # TODO: trt_converter.calibrate

        tf.reset_default_graph()
        self.graph = tf.Graph()
        with self.graph.as_default():
            tf.import_graph_def(trt_graph, name='')

        self.sess = tf.Session(graph=self.graph, config=self._get_gpu_config())

        self.state = self.sess.graph.get_tensor_by_name('board_state_input:0')
        self.policy = self.sess.graph.get_tensor_by_name(
            'policy_head/Softmax:0')
        self.value = self.sess.graph.get_tensor_by_name('value_head/Tanh:0')
Beispiel #4
0
def convert2trt(tf_savedmodel_dir: str, trt_savedmodel_dir: str):
    converter = trt.TrtGraphConverter(input_saved_model_dir=tf_savedmodel_dir,
                                      max_workspace_size_bytes=(2 << 20),
                                      precision_mode='FP16',
                                      maximum_cached_engines=1)
    converter.convert()
    converter.save(trt_savedmodel_dir)
def get_frozen_tftrt_model(bert_config, shape, num_labels,
                           use_one_hot_embeddings, init_checkpoint):
    tf_config = tf.ConfigProto()
    output_node_names = [
        'loss/cls_loss', 'loss/cls_per_example_loss', 'loss/cls_logits',
        'loss/cls_probabilities'
    ]

    with tf.Session(config=tf_config) as tf_sess:
        input_ids = tf.placeholder(tf.int32, shape, 'input_ids')
        input_mask = tf.placeholder(tf.int32, shape, 'input_mask')
        segment_ids = tf.placeholder(tf.int32, shape, 'segment_ids')
        label_ids = tf.placeholder(tf.int32, (None), 'label_ids')

        create_model(bert_config, False, input_ids, input_mask, segment_ids,
                     label_ids, num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        tf_sess.run(tf.global_variables_initializer())
        print("LOADED!")
        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            else:
                init_string = ", *NOTTTTTTTTTTTTTTTTTTTTT"
                tf.logging.info("  name = %s, shape = %s%s", var.name,
                                var.shape, init_string)

        frozen_graph = tf.graph_util.convert_variables_to_constants(
            tf_sess, tf_sess.graph.as_graph_def(), output_node_names)

        num_nodes = len(frozen_graph.node)
        print('Converting graph using TensorFlow-TensorRT...')
        from tensorflow.python.compiler.tensorrt import trt_convert as trt
        converter = trt.TrtGraphConverter(
            input_graph_def=frozen_graph,
            nodes_blacklist=output_node_names,
            max_workspace_size_bytes=(4096 << 20) - 1000,
            precision_mode="FP16" if FLAGS.use_fp16 else "FP32",
            minimum_segment_size=4,
            is_dynamic_op=True,
            maximum_cached_engines=1000)
        frozen_graph = converter.convert()

        print('Total node count before and after TF-TRT conversion:',
              num_nodes, '->', len(frozen_graph.node))
        print(
            'TRT node count:',
            len([1 for n in frozen_graph.node if str(n.op) == 'TRTEngineOp']))

        with tf.gfile.GFile("frozen_modelTRT.pb", "wb") as f:
            f.write(frozen_graph.SerializeToString())

    return frozen_graph
Beispiel #6
0
    def convert(self, model: Model, dataloader_fn) -> Model:
        # https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html
        # converting graph_def is not supported in TF2
        from tensorflow.python.compiler.tensorrt import trt_convert  # pytype: disable=import-error

        assert isinstance(model.handle, tf.compat.v1.GraphDef)

        session_config = create_session_config(allow_growth=True)
        output_node_names = [
            spec.name.split(":")[0] for spec in model.outputs.values()
        ]

        converter = trt_convert.TrtGraphConverter(
            input_graph_def=model.handle,
            session_config=session_config,
            nodes_blacklist=output_node_names,
            is_dynamic_op=self._is_dynamic_op,
            precision_mode=self._precision.value,
            max_workspace_size_bytes=self._max_workspace_size,
            maximum_cached_engines=self._maximum_cached_engines,
            max_batch_size=self._max_batch_size,
            minimum_segment_size=self._minimum_segment_size,
        )
        graph_def = converter.convert()

        return model._replace(handle=graph_def)
Beispiel #7
0
def save_tftrt():
    converter = trt.TrtGraphConverter(
        input_saved_model_dir=input_saved_model_dir,
        max_workspace_size_bytes=(11 < 32),
        precision_mode='FP16',
        maximum_cached_engines=100)
    converter.convert()
    converter.save(output_saved_model_dir)
def trt_frozen_graph_and_tensors(model_name, 
                                 frozen_graph_filepath=FROZEN_GRAPH_FILEPATH, 
                                 precision_mode='FP16'):
    from tensorflow.python.compiler.tensorrt import trt_convert as trt
    """ Loads a Tensorflow frozen graph and changes its precision mode.
    You can either use FP32 or FP16. FP32 is the original precision mode,
    but will use TensorRT optimization.
    
    Args:
        model_name (str): The name of your model, eg, resnet_manual_highres_center_only_f1_2_f2_4
        frozen_graph_filepath (str): Path to where the frozen graph was saved
        precision_mode (str): either 'FP32' or 'FP16'
    Returns:
        (tuple): tuple containing:
            frozen_graph (tf.compat.v1.Session): Session containing the TRT graph
            x (tf.Tensor): Tensor containing the x data
            y (tf.Tensor): Tensor containing the y data
    """
    
    if precision_mode in ['FP32', 'FP16']:
        print('OPENING FROZEN GRAPH FOR MODEL {}'.format(model_name))
        with open(frozen_graph_filepath, 'rb') as f:
            frozen_graph_gd = tf.compat.v1.GraphDef()
            frozen_graph_gd.ParseFromString(f.read())

            if precision_mode == 'FP16':
                converter = trt.TrtGraphConverter(input_graph_def=frozen_graph_gd, 
                                                  nodes_blacklist=['local_dense/truediv'],
                                                  precision_mode=precision_mode, 
                                                  use_calibration=True, 
                                                  is_dynamic_op=True)
                del frozen_graph_gd
                print('Converting to {}'.format(precision_mode))
                frozen_graph = converter.convert()
                print('Conversion finished')
                
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth=True
        with tf.compat.v1.Session(graph=tf.Graph(), config=config) as sess:
            if precision_mode == 'FP32':
                frozen_graph = frozen_graph_gd
            tf.import_graph_def(frozen_graph)

            input_node = 'import/input1_1'
            output_node = 'import/local_dense/truediv'

            frozen_graph = sess.graph
            x = frozen_graph.get_tensor_by_name(input_node + ':0')
            y = frozen_graph.get_tensor_by_name(output_node + ':0')
            return frozen_graph, x, y
    else:
        return 'Error on the precision mode'
Beispiel #9
0
    def _GetGraphDef(self, use_trt, max_batch_size, model_dir):
        """Get the frozen mnist GraphDef.

    Args:
      use_trt: whether use TF-TRT to convert the graph.
      max_batch_size: the max batch size to apply during TF-TRT conversion.
      model_dir: the model directory to load the checkpoints.

    Returns:
      The frozen mnist GraphDef.
    """
        graph = ops.Graph()
        with self.session(graph=graph) as sess:
            with graph.device('/GPU:0'):
                x = array_ops.placeholder(shape=(None, 28, 28, 1),
                                          dtype=dtypes.float32,
                                          name=INPUT_NODE_NAME)
                self._BuildGraph(x)
            # Load weights
            mnist_saver = saver.Saver()
            checkpoint_file = latest_checkpoint(model_dir)
            if checkpoint_file is None:
                raise ValueError(
                    'latest_checkpoint returned None. check if' +
                    'model_dir={} is the right directory'.format(model_dir))
            mnist_saver.restore(sess, checkpoint_file)
            # Freeze
            graph_def = graph_util.convert_variables_to_constants(
                sess, sess.graph_def, output_node_names=[OUTPUT_NODE_NAME])
        # Convert with TF-TRT
        if use_trt:
            logging.info('Number of nodes before TF-TRT conversion: %d',
                         len(graph_def.node))
            converter = trt_convert.TrtGraphConverter(
                input_graph_def=graph_def,
                nodes_blacklist=[OUTPUT_NODE_NAME],
                max_batch_size=max_batch_size,
                precision_mode='INT8',
                # There is a 2GB GPU memory limit for each test, so we set
                # max_workspace_size_bytes to 256MB to leave enough room for TF
                # runtime to allocate GPU memory.
                max_workspace_size_bytes=1 << 28,
                minimum_segment_size=2,
                use_calibration=False,
                use_function_backup=False)
            graph_def = converter.convert()
            logging.info('Number of nodes after TF-TRT conversion: %d',
                         len(graph_def.node))
            num_engines = len(
                [1 for n in graph_def.node if str(n.op) == 'TRTEngineOp'])
            self.assertEqual(1, num_engines)
        return graph_def
Beispiel #10
0
    def _ConvertGraph(self,
                      input_saved_model_dir=None,
                      output_saved_model_dir=None,
                      need_calibration=False,
                      max_batch_size=1,
                      minimum_segment_size=3,
                      is_dynamic_op=False,
                      maximum_cached_engines=1,
                      use_function_backup=False):
        """Helper method to convert a GraphDef or SavedModel using TF-TRT."""
        converter = trt_convert.TrtGraphConverter(
            input_saved_model_dir=input_saved_model_dir,
            input_saved_model_signature_key="mypredict",
            input_graph_def=None
            if input_saved_model_dir else self._GetGraphDef(),
            nodes_blacklist=["output"],
            session_config=self._GetConfigProto(),
            max_batch_size=max_batch_size,
            max_workspace_size_bytes=TrtConvertTest.
            _TRT_MAX_WORKSPACE_SIZE_BYTES,
            precision_mode=(trt_convert.TrtPrecisionMode.INT8
                            if need_calibration else
                            trt_convert.TrtPrecisionMode.FP32),
            minimum_segment_size=minimum_segment_size,
            is_dynamic_op=is_dynamic_op,
            maximum_cached_engines=maximum_cached_engines,
            use_function_backup=use_function_backup)
        conversion_result = converter.convert()

        if context.executing_eagerly():
            output_graph_def = conversion_result.graph.as_graph_def()
        else:
            output_graph_def = conversion_result

            if need_calibration:

                class CalibrationData(object):
                    def __init__(self):
                        self._data = 0

                    def next(self):
                        self._data += 1
                        return {"input:0": [[[self._data]]]}

                output_graph_def = converter.calibrate(
                    fetch_names=["output:0"],
                    num_runs=10,
                    feed_dict_fn=CalibrationData().next)

        if output_saved_model_dir is not None:
            converter.save(output_saved_model_dir=output_saved_model_dir)
        return output_graph_def
def optim_graph(graph, blacklist_names, precision_mode, mss, mce):
    ''' Returns the TRT converted graph given the input parameters. '''
    with tf.compat.v1.Session() as sess:
        converter = trt_convert.TrtGraphConverter(
            input_graph_def=graph,
            nodes_blacklist=blacklist_names,
            precision_mode=precision_mode,
            max_batch_size=1,
            max_workspace_size_bytes=int(5e8),
            minimum_segment_size=mss,
            maximum_cached_engines=mce,
            use_calibration=False)
        new_g = converter.convert()
    return new_g
 def _CreateConverter(self, saved_model_dir, session_config,
                      conversion_params):
     """Return a TrtGraphConverter."""
     converter = trt_convert.TrtGraphConverter(
         input_saved_model_dir=saved_model_dir,
         session_config=session_config,
         max_batch_size=conversion_params.max_batch_size,
         max_workspace_size_bytes=conversion_params.
         max_workspace_size_bytes,
         precision_mode=conversion_params.precision_mode,
         minimum_segment_size=conversion_params.minimum_segment_size,
         is_dynamic_op=conversion_params.is_dynamic_op,
         maximum_cached_engines=conversion_params.maximum_cached_engines,
         use_calibration=conversion_params.use_calibration,
         use_function_backup=conversion_params.use_function_backup)
     return converter
Beispiel #13
0
 def _create_converter(self, trt_convert_params: trt.TrtConversionParams):
     conversion_nodes_denylist = self.output_tensor_names
     return trt.TrtGraphConverter(
         input_saved_model_dir=self._saved_model_dir,
         input_saved_model_tags=self._saved_model_tags,
         input_saved_model_signature_key=self._saved_model_signature_key,
         nodes_denylist=conversion_nodes_denylist,
         max_batch_size=trt_convert_params.max_batch_size,
         max_workspace_size_bytes=trt_convert_params.
         max_workspace_size_bytes,
         precision_mode=trt_convert_params.precision_mode,
         minimum_segment_size=trt_convert_params.minimum_segment_size,
         is_dynamic_op=trt_convert_params.is_dynamic_op,
         maximum_cached_engines=trt_convert_params.maximum_cached_engines,
         use_calibration=trt_convert_params.use_calibration,
     )
Beispiel #14
0
def optimizeGraph(graph_def, output_nodes, user_trt_args=None):
    try:
        from tensorflow.python.compiler.tensorrt import trt_convert as trt
        tensor_rt_args={'input_graph_def':graph_def,
                        'nodes_blacklist':output_nodes,
                        'precision_mode':trt.TrtPrecisionMode.FP16,
                        'is_dynamic_op':True,
                        'maximum_cached_engines':10,
                        'minimum_segment_size': 6,
                        'max_batch_size':4}
        if user_trt_args:
            tensor_rt_args.update(user_trt_args)
        converter = trt.TrtGraphConverter(**tensor_rt_args)
        return converter.convert()
    except:
        print("WARNING: Unable to optomize graph.")
        return graph_def
 def _CreateConverter(self, gdef, session_config, conversion_params):
     """Return a TrtGraphConverter."""
     params = self._GetParamsCached()
     converter = trt_convert.TrtGraphConverter(
         input_graph_def=gdef,
         nodes_blacklist=params.input_names + params.output_names,
         session_config=session_config,
         max_batch_size=conversion_params.max_batch_size,
         max_workspace_size_bytes=conversion_params.
         max_workspace_size_bytes,
         precision_mode=conversion_params.precision_mode,
         minimum_segment_size=conversion_params.minimum_segment_size,
         is_dynamic_op=conversion_params.is_dynamic_op,
         maximum_cached_engines=conversion_params.maximum_cached_engines,
         cached_engine_batches=conversion_params.cached_engine_batches,
         use_calibration=conversion_params.use_calibration)
     return converter
Beispiel #16
0
def GenerateModelV1(tf_saved_model_dir, tftrt_saved_model_dir):
    """Generate and convert a model using TFv1 API."""
    def SimpleModel():
        """Define model with a TF graph."""
        def GraphFn():
            input1 = array_ops.placeholder(dtype=dtypes.float32,
                                           shape=[None, 1, 1],
                                           name="input1")
            input2 = array_ops.placeholder(dtype=dtypes.float32,
                                           shape=[None, 1, 1],
                                           name="input2")
            var = variables.Variable([[[1.0]]],
                                     dtype=dtypes.float32,
                                     name="v1")
            out = GetGraph(input1, input2, var)
            return g, var, input1, input2, out

        g = ops.Graph()
        with g.as_default():
            return GraphFn()

    g, var, input1, input2, out = SimpleModel()
    signature_def = signature_def_utils.build_signature_def(
        inputs={
            "input1": utils.build_tensor_info(input1),
            "input2": utils.build_tensor_info(input2)
        },
        outputs={"output": utils.build_tensor_info(out)},
        method_name=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY)
    saved_model_builder = builder.SavedModelBuilder(tf_saved_model_dir)
    with Session(graph=g) as sess:
        sess.run(var.initializer)
        saved_model_builder.add_meta_graph_and_variables(
            sess, [tag_constants.SERVING],
            signature_def_map={
                signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                signature_def
            })
    saved_model_builder.save()

    # Convert TF model to TensorRT
    converter = trt_convert.TrtGraphConverter(
        input_saved_model_dir=tf_saved_model_dir, is_dynamic_op=True)
    converter.convert()
    converter.save(tftrt_saved_model_dir)
Beispiel #17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--path', help='path to pb file.', required=True)
    parser.add_argument('--output', help='Output dir.', default='model')
    args = parser.parse_args()

    model_dir = args.output
    if tf.gfile.Exists(model_dir) == False:
        tf.gfile.MkDir(model_dir)

    if tf.gfile.Exists(args.path) == False:
        print('Error: pb file dose note exist!')
        return

    tf.reset_default_graph()
    graph = tf.Graph()

    graph_def = None
    with tf.gfile.GFile(args.path, 'rb') as f:
        graph_def = tf.GraphDef.FromString(f.read())

    output_names = ['SemanticPredictions']

    converter = trt_convert.TrtGraphConverter(
        input_graph_def=graph_def,
        nodes_blacklist=output_names,  #output nodes
        max_batch_size=1,
        # is_dynamic_op=False,
        is_dynamic_op=True,
        max_workspace_size_bytes=1 << 25,
        precision_mode=trt_convert.TrtPrecisionMode.FP16,
        minimum_segment_size=50)
    trt_graph = converter.convert()

    trt_engine_opts = len(
        [1 for n in trt_graph.node if str(n.op) == 'TRTEngineOp'])
    print("trt_engine_opts = {}".format(trt_engine_opts))

    base_name = os.path.splitext(os.path.basename(args.path))[0]
    save_model_file_name = base_name + '_dynamic_fp16.pb'
    with open(os.path.join(model_dir, save_model_file_name), 'wb') as f:
        f.write(trt_graph.SerializeToString())
def to_tf_trt(savedmodel_dir: str,
              output_dir: str,
              precision: str,
              feed_dict_fn: Callable,
              num_runs: int,
              output_tensor_names: List[str],
              compress: bool):
    """
    Export Tensorflow savedModel to TF-TRT

    :param savedmodel_dir: (str) Input directory containing a Tensorflow savedModel
    :param output_dir: (str) Output directory for storage of the generated TF-TRT exported model
    :param precision: (str) Desired precision of the network (FP32, FP16 or INT8)
    :param feed_dict_fn: (Callable) Input tensors for INT8 calibration. Model specific.
    :param num_runs: (int) Number of calibration runs.
    :param output_tensor_names: (List) Name of the output tensor for graph conversion. Model specific.
    :param compress: (bool) Compress output
    """
    if savedmodel_dir is None or not os.path.exists(savedmodel_dir):
        raise FileNotFoundError('savedmodel_dir not found: {}'.format(savedmodel_dir))

    if os.path.exists(output_dir):
        print('[*] Output dir \'{}\' is not empty. Cleaning up ...'.format(output_dir))
        shutil.rmtree(output_dir)

    print('[*] Converting model...')

    converter = trt.TrtGraphConverter(input_saved_model_dir=savedmodel_dir,
                                      precision_mode=precision)
    converter.convert()

    if precision == 'INT8':
        print('[*] Running INT8 calibration ...')

        converter.calibrate(fetch_names=output_tensor_names, num_runs=num_runs, feed_dict_fn=feed_dict_fn)

    converter.save(output_dir)

    print('[*] Done! TF-TRT saved_model stored in: `%s`' % output_dir)

    if compress:
        _compress('tftrt_saved_model', output_dir)
Beispiel #19
0
def optimizeGraph(graph_def, output_nodes, user_trt_args=None):
    if tf.test.is_gpu_available(cuda_only=True) is False or os.getenv("OPENEM_NOTRT") == "1":
        print("No GPU available to optimize for")
        return graph_def
    try:
        from tensorflow.python.compiler.tensorrt import trt_convert as trt
        tensor_rt_args={'input_graph_def':graph_def,
                        'nodes_blacklist':output_nodes,
                        'precision_mode':trt.TrtPrecisionMode.FP16,
                        'is_dynamic_op':True,
                        'maximum_cached_engines':10,
                        'minimum_segment_size': 6,
                        'max_batch_size':4}
        if user_trt_args:
            tensor_rt_args.update(user_trt_args)
        converter = trt.TrtGraphConverter(**tensor_rt_args)
        return converter.convert()
    except:
        print("WARNING: Unable to optomize graph.")
        return graph_def
Beispiel #20
0
  def _GetGraphDef(self, use_trt, max_batch_size, model_dir):
    """Gets the frozen mnist GraphDef.

    Args:
      use_trt: whether use TF-TRT to convert the graph.
      max_batch_size: the max batch size to apply during TF-TRT conversion.
      model_dir: the model directory to load the checkpoints.

    Returns:
      The frozen mnist GraphDef.
    """
    graph = ops.Graph()
    with self.session(graph=graph) as sess:
      with graph.device('/GPU:0'):
        x = array_ops.placeholder(
            shape=(None, 28, 28, 1), dtype=dtypes.float32, name=INPUT_NODE_NAME)
        self._BuildGraph(x)
      self._LoadWeights(model_dir, sess)
      # Freeze
      graph_def = graph_util.convert_variables_to_constants(
          sess, sess.graph_def, output_node_names=[OUTPUT_NODE_NAME])
    # Convert with TF-TRT
    if use_trt:
      logging.info('Number of nodes before TF-TRT conversion: %d',
                   len(graph_def.node))
      converter = trt_convert.TrtGraphConverter(
          input_graph_def=graph_def,
          nodes_denylist=[OUTPUT_NODE_NAME],
          max_batch_size=max_batch_size,
          precision_mode='INT8',
          max_workspace_size_bytes=(
              trt_convert.DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES),
          minimum_segment_size=2,
          use_calibration=False)
      graph_def = converter.convert()
      logging.info('Number of nodes after TF-TRT conversion: %d',
                   len(graph_def.node))
      num_engines = len(
          [1 for n in graph_def.node if str(n.op) == 'TRTEngineOp'])
      self.assertEqual(1, num_engines)
    return graph_def
Beispiel #21
0
def get_trt_graph_from_calib_cxg(graph_name, graph_def, data, input_node,
                                 output_node, output_dir):
    """Convert a TensorRT graph used for calibration to an inference graph."""
    converter = trt.TrtGraphConverter(
        input_graph_def=graph_def,
        nodes_blacklist=output_node,
        max_batch_size=4,
        # max_workspace_size_bytes=workspace_size<<20,
        precision_mode='INT8')
    converter.convert()

    def input_fn():
        iterator = get_iterator(data)

        return {input_node: iterator.get_next()}

    trt_graph = converter.calibrate(fetch_names=output_node,
                                    num_runs=1,
                                    input_map_fn=input_fn)
    write_graph_to_file(graph_name, trt_graph, output_dir)
    return trt_graph
def freeze_graph(model_path,
                 use_trt=False,
                 trt_max_batch_size=8,
                 trt_precision='fp32'):
    output_names = ['policy_output', 'value_output']

    n = DualNetwork(model_path)
    out_graph = tf.graph_util.convert_variables_to_constants(
        n.sess, n.sess.graph.as_graph_def(), output_names)

    # eval is always fp32, so let's store a eval copy before we trt.
    metadata = make_model_metadata({
        'engine': 'tf',
        'use_trt': False,
    })
    minigo_model.write_graph_def(out_graph, metadata,
                                 model_path + '.evalfp32minigo')

    if use_trt:
        from tensorflow.python.compiler.tensorrt import trt_convert as trt
        converter = trt.TrtGraphConverter(input_graph_def=out_graph,
                                          nodes_blacklist=output_names,
                                          max_batch_size=trt_max_batch_size,
                                          max_workspace_size_bytes=1 << 29,
                                          precision_mode=trt_precision)
        out_graph = converter.convert()

    metadata = make_model_metadata({
        'engine': 'tf',
        'use_trt': bool(use_trt),
    })

    # double buffer model write
    minigo_model.write_graph_def(out_graph, metadata,
                                 model_path + '.stagedmodel')
    minigo_model.write_graph_def(out_graph, metadata, model_path + '.minigo')
  def _CreateConverter(self, run_params, saved_model_dir, conversion_params):
    """Returns a TrtGraphConverter."""
    if run_params.is_v2:
      converter_v2 = trt_convert.TrtGraphConverterV2(
          input_saved_model_dir=saved_model_dir,
          use_dynamic_shape=run_params.dynamic_shape,
          dynamic_shape_profile_strategy=self._profile_strategy,
          **conversion_params._asdict())
      if self._disable_non_trt_optimizers:
        converter_v2._test_only_disable_non_trt_optimizers = True  # pylint: disable=protected-access
      return converter_v2

    converter_v1 = trt_convert.TrtGraphConverter(
        input_saved_model_dir=saved_model_dir,
        max_batch_size=self.GetMaxBatchSize(run_params),
        max_workspace_size_bytes=conversion_params.max_workspace_size_bytes,
        precision_mode=conversion_params.precision_mode,
        minimum_segment_size=conversion_params.minimum_segment_size,
        is_dynamic_op=run_params.dynamic_engine,
        maximum_cached_engines=conversion_params.maximum_cached_engines,
        use_calibration=conversion_params.use_calibration)
    if self._disable_non_trt_optimizers:
      converter_v1._test_only_disable_non_trt_optimizers = True  # pylint: disable=protected-access
    return converter_v1
def load_graph_and_convert(model_name, frozen_graph_filepath, precision_mode, seed, f1, f2):
    """ Load a full-precision (FP32) frozen graph, and return it as a half-precision (FP16) frozen graph
    @params:
        model_name (string): The "base" name of your model, such as "resnet", so it can be found in the directory
        frozen_graph_filepath (string): The path to the directory containing your frozen graph
        precision_mode (string): The precision you are converting your model, here we only use 'FP16' so far
    @return:
        The original frozen_graph converted into a different precision mode 
        
    """
    print('OPENING FROZEN GRAPH FOR MODEL {}.'.format(model_name))
 
    frozen_graph_filepath = frozen_graph_filepath + '{}_{}/flex_random_seed_{}_'.format(f1,f2,seed) + model_name + '_frozen_graph.pb'
    with open(frozen_graph_filepath, 'rb') as f:
        frozen_graph_gd = tf.GraphDef()
        frozen_graph_gd.ParseFromString(f.read())

    print('BEGINNING THE CONVERSION TO TRT {}'.format(precision_mode))
    converter = trt.TrtGraphConverter(input_graph_def=frozen_graph_gd,
                                      nodes_blacklist=['local_dense/truediv'], 
                                      precision_mode=precision_mode, 
                                      use_calibration=True,
                                      is_dynamic_op=True)

    try:
        frozen_graph = converter.convert()
        print('CONVERSION FINISHED WITH SUCCESS.')
    except Exception as e:
        errorMessage = 'Exception catched on file: '
        errorMessage += os.path.abspath(sys.argv[0]) + '\n'
        tracebackMessage = traceback.format_exc()
        text_file = open('./Logs/exceptions.txt', 'w')
        text_file.write(errorMessage + tracebackMessage + '\n')
        text_file.close()
        print(errorMessage)
    return frozen_graph
Beispiel #25
0
def get_frozen_graph(
    model,
    model_dir=None,
    use_trt=False,
    engine_dir=None,
    use_dynamic_op=False,
    precision='FP32',
    batch_size=8,
    minimum_segment_size=2,
    calib_files=None,
    num_calib_inputs=None,
    use_synthetic=False,
    cache=False,
    default_models_dir='./data',
    max_workspace_size=(1<<32)):
    """Retreives a frozen GraphDef from model definitions in classification.py and applies TF-TRT

    model: str, the model name (see NETS table in classification.py)
    use_trt: bool, if true, use TensorRT
    precision: str, floating point precision (FP32, FP16, or INT8)
    batch_size: int, batch size for TensorRT optimizations
    returns: tensorflow.GraphDef, the TensorRT compatible frozen graph
    """
    num_nodes = {}
    times = {}
    graph_sizes = {}

    # Load from pb file if frozen graph was already created and cached
    if cache:
        # Graph must match the model, TRT mode, precision, and batch size
        prebuilt_graph_path = "graphs/frozen_graph_%s_%d_%s_%d.pb" % (model, int(use_trt), precision, batch_size)
        if os.path.isfile(prebuilt_graph_path):
            print('Loading cached frozen graph from \'%s\'' % prebuilt_graph_path)
            start_time = time.time()
            with tf.gfile.GFile(prebuilt_graph_path, "rb") as f:
                frozen_graph = tf.GraphDef()
                frozen_graph.ParseFromString(f.read())
            times['loading_frozen_graph'] = time.time() - start_time
            num_nodes['loaded_frozen_graph'] = len(frozen_graph.node)
            num_nodes['trt_only'] = len([1 for n in frozen_graph.node if str(n.op)=='TRTEngineOp'])
            graph_sizes['loaded_frozen_graph'] = len(frozen_graph.SerializeToString())
            return frozen_graph, num_nodes, times, graph_sizes

    # Build graph and load weights
    frozen_graph = build_classification_graph(model, model_dir, default_models_dir)
    num_nodes['native_tf'] = len(frozen_graph.node)
    graph_sizes['native_tf'] = len(frozen_graph.SerializeToString())

    # Convert to TensorRT graph
    if use_trt:
        start_time = time.time()
        converter = trt.TrtGraphConverter(
            input_graph_def=frozen_graph,
            nodes_blacklist=['logits', 'classes'],
            max_batch_size=batch_size,
            max_workspace_size_bytes=max_workspace_size,
            precision_mode=precision.upper(),
            minimum_segment_size=minimum_segment_size,
            is_dynamic_op=use_dynamic_op
        )
        frozen_graph = converter.convert()
        times['trt_conversion'] = time.time() - start_time
        num_nodes['tftrt_total'] = len(frozen_graph.node)
        num_nodes['trt_only'] = len([1 for n in frozen_graph.node if str(n.op)=='TRTEngineOp'])
        graph_sizes['trt'] = len(frozen_graph.SerializeToString())

        if engine_dir:
            segment_number = 0
            for node in frozen_graph.node:
                if node.op == "TRTEngineOp":
                    engine = node.attr["serialized_segment"].s
                    engine_path = engine_dir+'/{}_{}_{}_segment{}.trtengine'.format(model, precision, batch_size, segment_number)
                    segment_number += 1
                    with open(engine_path, "wb") as f:
                        f.write(engine)

        if precision == 'INT8':
            calib_graph = frozen_graph
            graph_sizes['calib'] = len(calib_graph.SerializeToString())

            def input_map_fn():
                features, _ = input_fn(model, calib_files, batch_size, use_synthetic)
                return {'input:0': features}

            # INT8 calibration step
            print('Calibrating INT8...')
            start_time = time.time()
            frozen_graph = converter.calibrate(
                fetch_names=['logits', 'classes'],
                num_runs=num_calib_inputs // batch_size,
                input_map_fn=input_map_fn)
            times['trt_calibration'] = time.time() - start_time

            # This is already set but overwriting it here to ensure the right size
            graph_sizes['trt'] = len(frozen_graph.SerializeToString())

            del calib_graph
            print('INT8 graph created.')

    # Cache graph to avoid long conversions each time
    if cache:
        if not os.path.exists(os.path.dirname(prebuilt_graph_path)):
            try:
                os.makedirs(os.path.dirname(prebuilt_graph_path))
            except Exception as e:
                raise e
        start_time = time.time()
        with tf.gfile.GFile(prebuilt_graph_path, "wb") as f:
            f.write(frozen_graph.SerializeToString())
        times['saving_frozen_graph'] = time.time() - start_time

    return frozen_graph, num_nodes, times, graph_sizes
Beispiel #26
0
def MakeExtractor(sess, config, import_scope=None):
    """Creates a function to extract features from an image.

  Args:
    sess: TensorFlow session to use.
    config: DelfConfig proto containing the model configuration.
    import_scope: Optional scope to use for model.

  Returns:
    Function that receives an image and returns features.
  """
    '''
  tf.saved_model.loader.load(
      sess, [tf.saved_model.tag_constants.SERVING],
      config.model_path,
      import_scope=import_scope)
  '''
    with tf.gfile.GFile('./static_model/delf_both_white/frozen_graph.pb',
                        'rb') as f:
        frozen_graph = tf.GraphDef()
        frozen_graph.ParseFromString(f.read())
    # Now you can create a TensorRT inference graph from your
    # frozen graph:
    converter = trt.TrtGraphConverter(input_graph_def=frozen_graph,
                                      nodes_blacklist=[
                                          'global_feature', 'boxes',
                                          'features', 'scales', 'scores'
                                      ],
                                      precision_mode="FP32",
                                      maximum_cached_engines=100,
                                      is_dynamic_op=True)  #output nodes
    trt_graph = converter.convert()
    # Import the TensorRT graph into a new graph and run:
    output_node = tf.import_graph_def(trt_graph,
                                      return_elements=[
                                          'global_feature', 'boxes',
                                          'features', 'scales', 'scores'
                                      ])

    import_scope_prefix = import_scope + '/' if import_scope is not None else ''
    input_image = sess.graph.get_tensor_by_name('%sinput_image:0' %
                                                import_scope_prefix)
    input_score_threshold = sess.graph.get_tensor_by_name(
        '%sinput_abs_thres:0' % import_scope_prefix)
    input_image_scales = sess.graph.get_tensor_by_name('%sinput_scales:0' %
                                                       import_scope_prefix)
    input_max_feature_num = sess.graph.get_tensor_by_name(
        '%sinput_max_feature_num:0' % import_scope_prefix)
    global_feature = sess.graph.get_tensor_by_name('%sglobal_feature:0' %
                                                   import_scope_prefix)
    boxes = sess.graph.get_tensor_by_name('%sboxes:0' % import_scope_prefix)
    raw_descriptors = sess.graph.get_tensor_by_name('%sfeatures:0' %
                                                    import_scope_prefix)
    feature_scales = sess.graph.get_tensor_by_name('%sscales:0' %
                                                   import_scope_prefix)
    attention_with_extra_dim = sess.graph.get_tensor_by_name(
        '%sscores:0' % import_scope_prefix)
    attention = tf.reshape(attention_with_extra_dim,
                           [tf.shape(attention_with_extra_dim)[0]])

    locations, descriptors = feature_extractor.DelfFeaturePostProcessing(
        boxes, raw_descriptors, config)

    def ExtractorFn(image):
        """Receives an image and returns DELF features.

    If image is too small, returns empty set of features.

    Args:
      image: Uint8 array with shape (height, width, 3) containing the RGB image.

    Returns:
      Tuple (locations, descriptors, feature_scales, attention)
    """
        resized_image, scale_factor = ResizeImage(image, config)

        # If the image is too small, returns empty features.
        if resized_image.shape[0] < _MIN_HEIGHT or resized_image.shape[
                1] < _MIN_WIDTH:
            return np.array([]), np.array([]), np.array([]), np.array([])

        (global_feature_out, locations_out, descriptors_out,
         feature_scales_out, attention_out) = sess.run(
             [
                 global_feature, locations, descriptors, feature_scales,
                 attention
             ],
             feed_dict={
                 input_image: resized_image,
                 input_score_threshold:
                 config.delf_local_config.score_threshold,
                 input_image_scales: list(config.image_scales),
                 input_max_feature_num:
                 config.delf_local_config.max_feature_num
             })
        rescaled_locations_out = locations_out / scale_factor

        return (global_feature_out, rescaled_locations_out, descriptors_out,
                feature_scales_out, attention_out)

    return ExtractorFn
Beispiel #27
0
def evaluate(config, evaluation_set='val', plot_confusionMatrix=False):

    # --------------------------------------------------------------------
    # init network
    # --------------------------------------------------------------------

    tf.compat.v1.reset_default_graph()

    # define input placeholders

    input_placeholder = {}

    input_placeholder.update(
        {'is_training': tf.compat.v1.placeholder(dtype=tf.bool, shape=())})

    if config.ARCHITECTURE == 'semantic_segmentation':
        batch_size = config.BATCH_SIZE * config.TIMESEQUENCE_LENGTH
        # Search for available GPUs: the result is a list of device ids like `['/gpu:0', '/gpu:1']`
        devices = get_available_gpus()
        print("found devices: ", devices)
        num_GPU = len(devices)
        if (num_GPU) == 0:
            num_GPU = 1  # CPU support!
        # min 1 sample should be applied on a GPU
        if (config.BATCH_SIZE < num_GPU):
            num_GPU = config.BATCH_SIZE

        image_placeholder = []
        label_placeholder = []
        for iter in range(num_GPU):
            if (iter == (num_GPU - 1)):
                batch_size_local = batch_size - (num_GPU - 1) * (batch_size //
                                                                 num_GPU)
            else:
                batch_size_local = batch_size // num_GPU
            print('batch_size /gpu:{} : {}'.format(iter, num_GPU))

            image_placeholder.append(
                tf.compat.v1.placeholder(
                    dtype=tf.float32,
                    shape=(batch_size_local,
                           config.DATASET_TRAIN.INPUT_SIZE[0],
                           config.DATASET_TRAIN.INPUT_SIZE[1],
                           config.DATASET_TRAIN.NUM_CHANNELS)))
            label_placeholder.append(
                tf.compat.v1.placeholder(
                    dtype=tf.float32,
                    shape=(batch_size_local,
                           config.DATASET_TRAIN.INPUT_SIZE[0],
                           config.DATASET_TRAIN.INPUT_SIZE[1], 1)))

        input_placeholder.update({'image_batch': image_placeholder})
        input_placeholder.update({'label_batch': label_placeholder})
    else:
        print(
            '[ERROR] network architecture does not exist!!! Please check your spelling!'
        )
        raise NotImplementedError

    # load network architecture

    if config.ARCHITECTURE == 'semantic_segmentation':
        model = get_model(config.MODEL)
        net = model(
            {
                'data': input_placeholder['image_batch'],
                'is_training': input_placeholder['is_training']
            },
            is_training=input_placeholder['is_training'],
            evaluation=tf.logical_not(input_placeholder['is_training']),
            #is_inference=True,
            num_classes=config.DATASET_TRAIN.NUM_CLASSES,
            filter_scale=config.FILTER_SCALE,
            timeSequence=config.TIMESEQUENCE_LENGTH,
            variant=config.MODEL_VARIANT)
    else:
        print(
            '[ERROR] network architecture does not exist!!! Please check your spelling!'
        )
        raise NotImplementedError

    # --------------------------------------------------------------------
    # determine evaluation metric
    # --------------------------------------------------------------------

    if config.ARCHITECTURE == 'semantic_segmentation':

        list_raw_gt = []
        list_pred_flattern_mIoU = []

        for iter_gpu in range(len(input_placeholder['image_batch'])):
            with tf.device('/gpu:%d' % iter_gpu):
                if config.MODEL == 'SegNet_BN' or config.MODEL == 'SegNet_BN_encoder' or config.MODEL == 'SegNet_BN_decoder' or config.MODEL == 'SegNet_BN_encoderDecoder':
                    raw_output = net.layers['output'][iter_gpu]

                    raw_output_up = tf.argmax(raw_output,
                                              axis=3,
                                              output_type=tf.int32)
                    raw_pred_mIoU = tf.expand_dims(raw_output_up, dim=3)
                else:  # ICNet
                    ori_shape = config.DATASET_TRAIN.INPUT_SIZE  #??
                    raw_output = net.layers['output'][iter_gpu]

                    raw_output_up = tf.compat.v1.image.resize_bilinear(
                        raw_output, size=ori_shape[:2], align_corners=True)
                    raw_output_up = tf.argmax(raw_output_up,
                                              axis=3,
                                              output_type=tf.int32)
                    raw_pred_mIoU = tf.expand_dims(raw_output_up, dim=3)

                # determine mIoU

                if config.USAGE_TIMESEQUENCES:  # evaluate only last image of time sequence
                    pred_of_interest = np.array(
                        range(config.BATCH_SIZE), dtype=np.int32
                    ) * config.TIMESEQUENCE_LENGTH + config.TIMESEQUENCE_LENGTH - 1
                    pred_flatten_mIoU = tf.reshape(
                        tf.gather(raw_pred_mIoU, pred_of_interest), [
                            -1,
                        ])
                    raw_gt = tf.reshape(
                        tf.gather(input_placeholder['label_batch'][iter_gpu],
                                  pred_of_interest), [
                                      -1,
                                  ])
                else:  # evaluate all images of batch size
                    pred_flatten_mIoU = tf.reshape(raw_pred_mIoU, [
                        -1,
                    ])
                    raw_gt = tf.reshape(
                        input_placeholder['label_batch'][iter_gpu], [
                            -1,
                        ])

                list_raw_gt.append(raw_gt)
                list_pred_flattern_mIoU.append(pred_flatten_mIoU)

        # combine output of different GPUs
        with tf.device('/gpu:%d' % 0):
            all_raw_gt = tf.reshape(tf.concat(list_raw_gt, -1), [
                -1,
            ])
            all_pred_flatten_mIoU = tf.reshape(
                tf.concat(list_pred_flattern_mIoU, -1), [
                    -1,
                ])

            indices_mIoU = tf.squeeze(
                tf.where(
                    tf.less_equal(raw_gt,
                                  config.DATASET_TRAIN.NUM_CLASSES - 1)), 1)
            gt_mIoU = tf.cast(tf.gather(raw_gt, indices_mIoU), tf.int32)
            pred_mIoU = tf.gather(pred_flatten_mIoU, indices_mIoU)

        mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(
            pred_mIoU, gt_mIoU, num_classes=config.DATASET_VAL.NUM_CLASSES)

        # create colored image

        pred_color = decode_labels(pred_flatten_mIoU,
                                   config.DATASET_VAL.INPUT_SIZE[0:2],
                                   config.DATASET_VAL.NUM_CLASSES)

        # deterimine confusing matrix

        if plot_confusionMatrix:
            # Create an accumulator variable to hold the counts
            confusion = tf.Variable(tf.zeros([
                config.DATASET_VAL.NUM_CLASSES, config.DATASET_VAL.NUM_CLASSES
            ],
                                             dtype=tf.int64),
                                    name='confusion',
                                    collections=[
                                        tf.compat.v1.GraphKeys.LOCAL_VARIABLES
                                    ])
            # Compute a per-batch confusion
            batch_confusion = tf.math.confusion_matrix(
                tf.reshape(gt_mIoU, [-1]),
                tf.reshape(pred_mIoU, [-1]),
                num_classes=config.DATASET_VAL.NUM_CLASSES,
                name='batch_confusion')
            # Create the update op for doing a "+=" accumulation on the batch
            confusion_update = confusion.assign(
                confusion + tf.cast(batch_confusion, dtype=tf.int64))

    # -----------------------------------------
    # init session
    # -----------------------------------------

    # Set up tf session and initialize variables.

    sessConfig = tf.compat.v1.ConfigProto()
    sessConfig.gpu_options.allow_growth = True
    # use only a fraction of gpu memory, otherwise the TensorRT-test has not enough free GPU memory for execution
    sessConfig.gpu_options.per_process_gpu_memory_fraction = 0.5
    sess = tf.compat.v1.Session(config=sessConfig)
    init = tf.compat.v1.global_variables_initializer()
    local_init = tf.compat.v1.local_variables_initializer()

    sess.run(init)
    sess.run(local_init)

    # load checkpoint file

    print(config.EVALUATION.MODELPATH)
    ckpt = tf.compat.v1.train.get_checkpoint_state(config.EVALUATION.MODELPATH)
    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.compat.v1.train.Saver(
            var_list=tf.compat.v1.global_variables())
        load(loader, sess, ckpt.model_checkpoint_path)

    else:
        print('No checkpoint file found.')

    # `sess.graph` provides access to the graph used in a <a href="./../api_docs/python/tf/Session"><code>tf.Session</code></a>.
    writer = tf.compat.v1.summary.FileWriter("/tmp/tensorflow_graph",
                                             tf.compat.v1.get_default_graph())

    # --------------------------------------------------------------------
    # Evaluate - Iterate over training steps.
    # --------------------------------------------------------------------

    # evaluate training or validation set

    if evaluation_set == "val":
        imagereader_val = ImageReader(config.IMAGEREADER.VAL,
                                      config.DATASET_VAL, config.BATCH_SIZE,
                                      config.TIMESEQUENCE_LENGTH)
    elif evaluation_set == "train":
        imagereader_val = ImageReader(config.IMAGEREADER.VAL,
                                      config.DATASET_TRAIN, config.BATCH_SIZE,
                                      config.TIMESEQUENCE_LENGTH)
    elif evaluation_set == "test":
        imagereader_val = ImageReader(config.IMAGEREADER.VAL,
                                      config.DATASET_TEST, config.BATCH_SIZE,
                                      config.TIMESEQUENCE_LENGTH)
    elif evaluation_set == "all":
        imagereader_val = ImageReader(config.IMAGEREADER.VAL,
                                      config.DATASET_ALL, config.BATCH_SIZE,
                                      config.TIMESEQUENCE_LENGTH)
    else:
        print("Dataset {} does not exist!".format(evaluation_set))

    filename_memory = ""
    filename_count = 0
    average_inference_time = 0

    # --------------------------------------
    # perform evaluation - semantic segmentation
    # --------------------------------------

    if config.ARCHITECTURE == 'semantic_segmentation':
        if config.TIMESEQUENCES_SLIDINGWINDOW:  # use time sequences
            for step in trange(
                    int(imagereader_val._dataset_amount -
                        config.BATCH_SIZE * config.TIMESEQUENCE_LENGTH + 1),
                    desc='inference',
                    leave=True):
                #start_time = time.time()

                training_batch = imagereader_val.getNextMinibatch()

                feed_dict = {input_placeholder['is_training']: False}

                for iter_GPU in range(len(input_placeholder['image_batch'])):
                    num_GPU = len(input_placeholder['image_batch'])
                    batch_size = training_batch['blob_data'].shape[0]
                    batch_size_local = batch_size // num_GPU
                    if (iter_GPU == (num_GPU - 1)):
                        batch_size_act = batch_size - (num_GPU - 1) * (
                            batch_size // num_GPU)
                    else:
                        batch_size_act = batch_size // num_GPU

                    feed_dict.update({
                        input_placeholder['image_batch'][iter_GPU]:
                        training_batch['blob_data'][iter_GPU *
                                                    batch_size_local:iter_GPU *
                                                    batch_size_local +
                                                    batch_size_act, :, :, :],
                        input_placeholder['label_batch'][iter_GPU]:
                        training_batch['blob_label']
                        [iter_GPU *
                         batch_size_local:iter_GPU * batch_size_local +
                         batch_size_act, :, :, :]
                    })

                start_time = time.time()

                if plot_confusionMatrix:
                    sess.run([update_op, confusion_update],
                             feed_dict=feed_dict)
                else:
                    sess.run([update_op], feed_dict=feed_dict)

                duration = time.time() - start_time
                average_inference_time += duration

                # save image
                prediction = sess.run([pred_color], feed_dict=feed_dict)
                predi = np.array(
                    prediction[0][0, :, :, :]).astype(dtype=np.uint8)

                img = cv2.imread(imagereader_val._dataset[step][0])
                if imagereader_val._configDataset.USE_IMAGE_ROI:
                    img = img[imagereader_val._configDataset.IMAGE_ROI_MIN_X:
                              imagereader_val._configDataset.IMAGE_ROI_MAX_X,
                              imagereader_val._configDataset.
                              IMAGE_ROI_MIN_Y:imagereader_val._configDataset.
                              IMAGE_ROI_MAX_Y, :]
                cv2.addWeighted(predi, config.INFERENCE.OVERLAPPING_IMAGE, img,
                                1 - config.INFERENCE.OVERLAPPING_IMAGE, 0, img)

                buff = imagereader_val._dataset[step][-1]
                buff = buff.split('/')
                filename = buff[-1].split('.')[0]
                if filename_memory == buff[-1].split('.')[0]:
                    filename_count += 1
                    filename = buff[-1].split('.')[0] + "_" + str(
                        filename_count) + ".png"
                else:
                    filename_memory = buff[-1].split('.')[0]
                    filename = buff[-1].split('.')[0] + "_0.png"
                    filename_count = 0

                cv2.imwrite(config.INFERENCE.SAVEDIR_IMAGES + filename,
                            predi[:, :, (2, 1, 0)])
                cv2.imwrite(config.INFERENCE.SAVEDIR_IMAGES + filename,
                            img[:, :, (2, 1, 0)])
            # determine average time
            average_inference_time = average_inference_time / int(
                imagereader_val._dataset_amount -
                config.BATCH_SIZE * config.TIMESEQUENCE_LENGTH + 1)
        else:  # do not use time sequences (normal evaluation)
            # TODO parameters for flickering evaluation

            flickering_sum = 0
            flickering_img_size = 0
            flickering_sum1 = 0
            flickering_img_size1 = 0

            for step in trange(
                    int(imagereader_val._dataset_amount /
                        (config.BATCH_SIZE * config.TIMESEQUENCE_LENGTH)),
                    desc='inference',
                    leave=True):

                training_batch = imagereader_val.getNextMinibatch()

                feed_dict = {input_placeholder['is_training']: False}

                for iter_GPU in range(len(input_placeholder['image_batch'])):
                    num_GPU = len(input_placeholder['image_batch'])
                    batch_size = training_batch['blob_data'].shape[0]
                    batch_size_local = batch_size // num_GPU
                    if (iter_GPU == (num_GPU - 1)):
                        batch_size_act = batch_size - (num_GPU - 1) * (
                            batch_size // num_GPU)
                    else:
                        batch_size_act = batch_size // num_GPU

                    # for depth images, if result should be shown in camera image

                    if True:
                        feed_dict.update({
                            input_placeholder['image_batch'][iter_GPU]:
                            training_batch['blob_data']
                            [iter_GPU *
                             batch_size_local:iter_GPU * batch_size_local +
                             batch_size_act, :, :, :],
                            input_placeholder['label_batch'][iter_GPU]:
                            training_batch['blob_label']
                            [iter_GPU *
                             batch_size_local:iter_GPU * batch_size_local +
                             batch_size_act, :, :, :]
                        })
                    else:
                        training_batch['blob_data'][:, :, :,
                                                    1] = training_batch[
                                                        'blob_data'][:, :, :,
                                                                     -1]
                        training_batch['blob_data'][:, :, :,
                                                    2] = training_batch[
                                                        'blob_data'][:, :, :,
                                                                     -1]

                        feed_dict.update({
                            input_placeholder['image_batch'][iter_GPU]:
                            training_batch['blob_data']
                            [iter_GPU *
                             batch_size_local:iter_GPU * batch_size_local +
                             batch_size_act, :, :, 1:4],
                            input_placeholder['label_batch'][iter_GPU]:
                            training_batch['blob_label']
                            [iter_GPU *
                             batch_size_local:iter_GPU * batch_size_local +
                             batch_size_act, :, :, :]
                        })

                start_time = time.time()

                if plot_confusionMatrix:
                    sess.run([update_op, confusion_update],
                             feed_dict=feed_dict)
                else:
                    sess.run([update_op], feed_dict=feed_dict)

                duration = time.time() - start_time

                # skip the first 50 images
                if step >= 50:
                    average_inference_time += duration

                # --------------------------
                # save image
                # --------------------------

                prediction = sess.run([pred_color, raw_output_up],
                                      feed_dict=feed_dict)
                predi = np.array(
                    prediction[0][0, :, :, :]).astype(dtype=np.uint8)
                predi_id = np.array(prediction[1][-1,
                                                  ...]).astype(dtype=np.uint8)

                data_index = step * config.TIMESEQUENCE_LENGTH + config.TIMESEQUENCE_LENGTH - 1

                buff = imagereader_val._dataset[data_index][-1]
                buff = buff.split('/')
                filename = buff[-1].split('.')[0]
                if filename_memory == buff[-1].split('.')[0]:
                    filename_count += 1
                    filename = buff[-1].split('.')[0] + "_" + str(
                        filename_count).zfill(6) + ".png"
                else:
                    filename_memory = buff[-1].split('.')[0]
                    filename = buff[-1].split('.')[0] + "_000000.png"
                    filename_count = 0

                img = cv2.imread(imagereader_val._dataset[data_index][0])
                if imagereader_val._configDataset.USE_IMAGE_ROI:
                    img = img[imagereader_val._configDataset.IMAGE_ROI_MIN_X:
                              imagereader_val._configDataset.IMAGE_ROI_MAX_X,
                              imagereader_val._configDataset.
                              IMAGE_ROI_MIN_Y:imagereader_val._configDataset.
                              IMAGE_ROI_MAX_Y, :]

                # label images for video

                cv2.addWeighted(predi, config.INFERENCE.OVERLAPPING_IMAGE, img,
                                1 - config.INFERENCE.OVERLAPPING_IMAGE, 0, img)

                cv2.imwrite(
                    config.INFERENCE.SAVEDIR_IMAGES + 'pred_' + filename,
                    predi[:, :, (2, 1, 0)])
                cv2.imwrite(
                    config.INFERENCE.SAVEDIR_IMAGES + 'overlay_' + filename,
                    img[:, :, (2, 1, 0)])

                # --------------------------
                # flickering evaluation
                # --------------------------

                # to measure flickering between non-ground-truth classes, multiply it with the predicted result (add 1, since True * class 0 = 0!)
                diff_img = np.array(
                    (predi_id != training_batch['blob_label'][-1, :, :, 0]),
                    np.float32) * np.array(
                        training_batch['blob_label'][-1, :, :, 0] + 1,
                        np.float32)
                diff_img1 = np.array(predi_id, np.float32)

                if step > 0:  # skip step = 0, since there is no reference image
                    flickering = np.sum((diff_img != prediction_old))
                    flickering_sum += flickering
                    flickering_img_size += predi_id.shape[0] * predi_id.shape[1]

                    flickering1 = np.sum((diff_img1 != prediction_old1))
                    flickering_sum1 += flickering1
                    flickering_img_size1 += predi_id.shape[0] * predi_id.shape[
                        1]

                prediction_old = diff_img
                prediction_old1 = diff_img1

            # determine average time
            average_inference_time = average_inference_time / int(
                imagereader_val._dataset_amount /
                (config.BATCH_SIZE * config.TIMESEQUENCE_LENGTH) - 50)

        mIoU_value = sess.run(mIoU)

        print('flickering_sum: ', flickering_sum)
        print('FP: ', float(flickering_sum) / float(flickering_img_size))
        print('--------------')
        print('flickering_sum1: ', flickering_sum1)
        print('FIP: ', float(flickering_sum1) / float(flickering_img_size1))
        print('--------------')
        print(
            float(flickering_sum) / float(flickering_img_size),
            float(flickering_sum1) / float(flickering_img_size1))
        print('--------------')

        if plot_confusionMatrix:
            confusion_matrix = sess.run(confusion)

            # print Accuracy:
            np.set_printoptions(linewidth=np.inf)  #150)
            acc_value = float(np.sum(np.diag(confusion_matrix))) / float(
                np.sum(confusion_matrix))
    else:
        print(
            '[ERROR] network architecture does not exist!!! Please check your spelling!'
        )
        raise NotImplementedError

    # --------------------------------------------
    # create optimized pb-model
    # --------------------------------------------

    if config.FREEZEINFERENCEGRAPH.MODE:

        output_nodes = config.FREEZEINFERENCEGRAPH.OUTPUT_NODE_NAMES.split(',')
        input_nodes = config.FREEZEINFERENCEGRAPH.INPUT_NODE_NAMES.split(',')

        frozen_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
            sess,
            tf.compat.v1.get_default_graph().as_graph_def(), output_nodes)

        # Write tensorrt graph def to pb file for use in C++
        output_graph_path = config.EVALUATION.MODELPATH + '/model.pb'
        with tf.io.gfile.GFile(output_graph_path, "wb") as f:
            f.write(frozen_graph_def.SerializeToString())

        transforms = [
            'remove_nodes(op=Identity)', 'merge_duplicate_nodes',
            'strip_unused_nodes', 'fold_constants(ignore_errors=true)',
            'fold_batch_norms', 'remove_device'
        ]

        optimized_graph_def = TransformGraph(frozen_graph_def, input_nodes,
                                             output_nodes, transforms)

        print('inference model saved in ', output_graph_path)

    # ------------------------------------
    # apply TensorRT
    # ------------------------------------

    if config.FREEZEINFERENCEGRAPH.MODE:

        config_TensorRT = tf.ConfigProto()
        #config_TensorRT.gpu_options.per_process_gpu_memory_fraction = 0.5
        config_TensorRT.gpu_options.allow_growth = True

        converter = trt.TrtGraphConverter(input_graph_def=optimized_graph_def,
                                          session_config=config_TensorRT,
                                          max_workspace_size_bytes=4000000000,
                                          nodes_blacklist=output_nodes,
                                          is_dynamic_op=False,
                                          precision_mode='FP16')
        converted_graph_def = converter.convert()

        # Write tensorrt graph def to pb file for use in C++
        output_graph_TensorRT_path = config.EVALUATION.MODELPATH + '/tensorrt_model.pb'
        with tf.io.gfile.GFile(output_graph_TensorRT_path, "wb") as f:
            f.write(converted_graph_def.SerializeToString())

        print('TensorRT-model saved in ', output_graph_TensorRT_path)

    # --------------------------------------------
    # close session
    # --------------------------------------------

    writer.close()
    sess.close()
    tf.compat.v1.reset_default_graph()

    # ------------------------------------------
    # define inference-function for model.pb
    # ------------------------------------------

    def determineInferenceTime(path2frozenmodel):

        # We load the protobuf file from the disk and parse it to retrieve the unserialized graph_def
        with tf.gfile.GFile(path2frozenmodel, "rb") as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())

        for node in graph_def.node:
            if node.op == 'RefSwitch':
                node.op = 'Switch'
                for index in xrange(len(node.input)):
                    if 'moving_' in node.input[index]:
                        node.input[index] = node.input[index] + '/read'
            elif node.op == 'AssignSub':
                node.op = 'Sub'
                if 'use_locking' in node.attr: del node.attr['use_locking']
            elif node.op == 'AssignAdd':
                node.op = 'Add'
                if 'use_locking' in node.attr: del node.attr['use_locking']
            elif node.op == 'AssignMovingAvg':
                node.op = 'MovingAvg'
                if 'use_locking' in node.attr: del node.attr['use_locking']

        # Then, we import the graph_def into a new Graph and returns it
        with tf.Graph().as_default() as graph:
            # The name var will prefix every op/nodes in your graph
            # Since we load everything in a new graph, this is not needed
            tf.import_graph_def(graph_def, name="")

        # print output node names
        if config.FREEZEINFERENCEGRAPH.PRINT_OUTPUT_NODE_NAMES:
            for op in graph.get_operations():
                print(str(op.name))

        ##### init network

        if config.ARCHITECTURE == 'semantic_segmentation':
            image_tensor = graph.get_tensor_by_name('Placeholder_1:0')
            ouput_tensor = graph.get_tensor_by_name('ArgMax:0')

        # init session   # Note: we don't nee to initialize/restore anything. There is no Variables in this graph, only hardcoded constants

        sess_inf = tf.compat.v1.Session(graph=graph)
        average_inference_time_frozenModel = 0.0

        for step in trange(
                int(imagereader_val._dataset_amount /
                    (config.BATCH_SIZE * config.TIMESEQUENCE_LENGTH)),
                desc='inference',
                leave=True):

            # get images from datastet

            training_batch = imagereader_val.getNextMinibatch()

            if config.ARCHITECTURE == 'semantic_segmentation':
                feed_dict = {image_tensor: training_batch['blob_data']}

            # apply inference

            start_time = time.time()

            if config.ARCHITECTURE == 'semantic_segmentation':
                sess_inf.run(ouput_tensor, feed_dict=feed_dict)

            duration_inf = time.time() - start_time

            # skip the first 50 images
            if step >= 50:
                average_inference_time_frozenModel += duration_inf

        sess_inf.close()
        average_inference_time_frozenModel = average_inference_time_frozenModel / int(
            imagereader_val._dataset_amount /
            (config.BATCH_SIZE * config.TIMESEQUENCE_LENGTH) - 50)

        return average_inference_time_frozenModel

    # ------------------------------------------
    # test model - optimized model
    # ------------------------------------------

    if config.FREEZEINFERENCEGRAPH.MODE:

        ### apply optimized model (model.pb)

        path2frozenmodel_opt = config.EVALUATION.MODELPATH + '/model.pb'
        average_inference_time_opt = determineInferenceTime(
            path2frozenmodel_opt)

        ### apply TensorRT model (tensorrt_model.pb)

        path2frozenmodel_tensorrt = config.EVALUATION.MODELPATH + '/tensorrt_model.pb'
        average_inference_time_tensorrt = determineInferenceTime(
            path2frozenmodel_tensorrt)

        print('average time optimized model: {:.2f} ms'.format(
            average_inference_time_opt * 1000.0))
        print('average time TensorRT: {:.2f} ms'.format(
            average_inference_time_tensorrt * 1000.0))

    # --------------------------------------------------------------------
    # Show results
    # --------------------------------------------------------------------

    print('average time: {:.2f} ms'.format(average_inference_time * 1000.0))

    if plot_confusionMatrix and config.ARCHITECTURE == 'semantic_segmentation':
        # determine class-wise IoU
        buff = 0.0
        print('-------------------------------------------------------------')
        for iter in xrange(config.DATASET_VAL.NUM_CLASSES):
            if np.sum(
                    confusion_matrix[iter, :]) == 0:  # avoid division by zero
                IoU = 0.0
            else:
                IoU = 100.0 * confusion_matrix[iter, iter] / (
                    np.sum(confusion_matrix[iter, :]) +
                    np.sum(confusion_matrix[:, iter]) -
                    confusion_matrix[iter, iter])
            buff = buff + IoU
            print('{}: {}'.format(config.DATASET_VAL.CLASSES[iter], IoU))
        print('-------------------------------------------------------------')
        print('dataset: {} - {}'.format(config.DATASET_NAME,
                                        config.DATASET_WEATHER))
        print('Accuracy: {}'.format(acc_value))
        print('mIoU: {}'.format(mIoU_value))
        print('average time: {:.2f} ms'.format(average_inference_time *
                                               1000.0))
        print('-------------------------------------------------------------')

    if plot_confusionMatrix and config.ARCHITECTURE == 'semantic_segmentation':
        print('-------------------------------------------------------------')
        print(confusion_matrix)
        print('-------------------------------------------------------------')
        #plot_confusion_matrix(confusion_matrix, config.DATASET_VAL.CLASSES)

    return mIoU_value
Beispiel #28
0
def inference(run,
              iterations,
              ckpt_path,
              inference_input_file,
              inference_output_file,
              hparams,
              num_workers=1,
              jobid=0,
              scope=None):
    """Perform translation."""
    if hparams.inference_indices:
        assert num_workers == 1

    model_creator = get_model_creator(hparams)
    infer_model = model_helper.create_infer_model(model_creator, hparams,
                                                  scope)
    sess, loaded_infer_model = start_sess_and_load_model(
        infer_model, ckpt_path, hparams)

    # FIXME (bryce): Set to False to disable inference from frozen graph and run fast again
    if True:
        frozen_graph = None
        with infer_model.graph.as_default():
            output_node_names = ['hash_table_Lookup_1/LookupTableFindV2']
            other_node_names = [
                'MakeIterator', 'IteratorToStringHandle', 'init_all_tables',
                'NoOp', 'dynamic_seq2seq/decoder/NoOp'
            ]
            frozen_graph = tf.graph_util.convert_variables_to_constants(
                sess,
                tf.get_default_graph().as_graph_def(),
                output_node_names=output_node_names + other_node_names)

            from tensorflow.python.compiler.tensorrt import trt_convert as trt
            converter = trt.TrtGraphConverter(
                input_graph_def=frozen_graph,
                nodes_blacklist=(output_node_names),
                is_dynamic_op=True,
                max_batch_size=hparams.infer_batch_size,
                max_beam_size=hparams.beam_width,
                max_src_seq_len=hparams.src_max_len)
            frozen_graph = converter.convert()

        with tf.Graph().as_default():
            tf.graph_util.import_graph_def(frozen_graph, name="")
            sess = tf.Session(graph=tf.get_default_graph(),
                              config=utils.get_config_proto(
                                  num_intra_threads=hparams.num_intra_threads,
                                  num_inter_threads=hparams.num_inter_threads))
            iterator = iterator_utils.BatchedInput(
                initializer=tf.get_default_graph().get_operation_by_name(
                    infer_model.iterator.initializer.name),
                source=tf.get_default_graph().get_tensor_by_name(
                    infer_model.iterator.source.name),
                target_input=None,
                target_output=None,
                source_sequence_length=tf.get_default_graph(
                ).get_tensor_by_name(
                    infer_model.iterator.source_sequence_length.name),
                target_sequence_length=None)
            infer_model = model_helper.InferModel(
                graph=tf.get_default_graph(),
                model=infer_model.model,
                src_placeholder=tf.get_default_graph().get_tensor_by_name(
                    infer_model.src_placeholder.name),
                batch_size_placeholder=tf.get_default_graph(
                ).get_tensor_by_name(infer_model.batch_size_placeholder.name),
                iterator=iterator)

    if num_workers == 1:
        single_worker_inference(run, iterations, sess, infer_model,
                                loaded_infer_model, inference_input_file,
                                inference_output_file, hparams)
    else:
        multi_worker_inference(sess,
                               infer_model,
                               loaded_infer_model,
                               inference_input_file,
                               inference_output_file,
                               hparams,
                               num_workers=num_workers,
                               jobid=jobid)
    sess.close()
import os
import shutil
import tensorflow as tf
from tensorflow.python.compiler.tensorrt import trt_convert as trt

org_savedmodel_dir = "output_savedmodel_dir"
tensorrt_savedmodel_dir = "converted_savedmodel_dir"

if not os.path.exists(org_savedmodel_dir):
    raise FileNotFoundError("notfound")

if os.path.exists(tensorrt_savedmodel_dir):
    shutil.rmtree(tensorrt_savedmodel_dir)
    os.mkdir(tensorrt_savedmodel_dir)

converter = trt.TrtGraphConverter(input_saved_model_dir=org_savedmodel_dir)
converter.convert()
converter.save(tensorrt_savedmodel_dir)

# Evaluation for Original SavedModel
###
with tf.Session() as sess:
    meta_graph = tf.saved_model.loader.load(sess, [tf.saved_model.SERVING],
                                            org_savedmodel_dir)
    model_signature = meta_graph.signature_def['serving_default']
    input_signature = model_signature.inputs
    output_signature = model_signature.outputs
    start = timeit.default_timer()
    feed_dict = {
        sess.graph.get_tensor_by_name(input_signature['myInput'].name):
        mnist.test.images[:10]
import timeit
import os
import shutil
import tensorflow as tf
from tensorflow.python.compiler.tensorrt import trt_convert as trt

org_savedmodel_dir = os.path.join(os.getcwd(), "savedmodel_dir_latest")
tensorrt_savedmodel_dir = os.path.join(os.getcwd(), "tensorrt_savedmodel_dir")

if not os.path.exists(org_savedmodel_dir):
    raise FileNotFoundError("notfound")

if os.path.exists(tensorrt_savedmodel_dir):
    shutil.rmtree(tensorrt_savedmodel_dir)
    os.mkdir(tensorrt_savedmodel_dir)

converter = trt.TrtGraphConverter(input_saved_model_dir=org_savedmodel_dir, input_saved_model_signature_key="predict_images",\
        #precision_mode=trt.TrtPrecisionMode.INT8,\
        #use_calibration=False\
        )
converter.convert()
converter.save(tensorrt_savedmodel_dir)