def UFF_write(): uff.from_tensorflow_frozen_model( frozen_file=FROZEN_GRAPH_FILENAME, output_nodes=[OUTPUT_NAME], output_filename=OUTPUT_UFF_FILE_NAME, text=False, )
def main(): parser = argparse.ArgumentParser( description="Generate UFF file from protobuf file.") parser.add_argument( "-p", "--pb_file_name", type=str, required=True, help="""A protobuf file containing a frozen tensorflow graph""") parser.add_argument("-u", "--uff_filename", type=str, required=True, help="""Output UFF file""") parser.add_argument("-o", "--out_tensor_names", type=str, required=True, help="""Output Tensor names""") args, unknown_args = parser.parse_known_args() out_tensor_names = [args.out_tensor_names] uff.from_tensorflow_frozen_model(args.pb_file_name, out_tensor_names, output_filename=args.uff_filename, text=True, quiet=False, write_preprocessed=True, debug_mode=False)
def convert_model(inference_model, output_path, output_nodes=[], preprocessor=None, text=False, list_nodes=False): # convert the keras model to pb orig_output_node_names = [node.op.name for node in inference_model.outputs] print("The output names of tensorflow graph nodes: {}".format(str(orig_output_node_names))) sess = K.get_session() constant_graph = graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), orig_output_node_names) temp_pb_path = "../temp.pb" graph_io.write_graph(constant_graph, os.path.dirname(temp_pb_path), os.path.basename(temp_pb_path), as_text=False) predefined_output_nodes = output_nodes if predefined_output_nodes != []: trt_output_nodes = predefined_output_nodes else: trt_output_nodes = orig_output_node_names # convert .pb to .uff uff.from_tensorflow_frozen_model( temp_pb_path, output_nodes=trt_output_nodes, preprocessor=preprocessor, text=text, list_nodes=list_nodes, output_filename=output_path, debug_mode = False ) os.remove(temp_pb_path)
def to_uff(model_path, pb_model_file, uff_model_file): forward.self_print("to uff[%s]" % (model_path)) with tf.Graph().as_default() as g: # 定义输入占位 #x = tf.placeholder(tf.float32, [None, forward.INPUT_NODE], name='x-input') x = tf.compat.v1.placeholder( tf.float32, [32, forward.NUM_CHANNELS, forward.IMAGE_SZIE, forward.IMAGE_SZIE], name='x-input') # 因输入的是NCHW 需转成NHWC x_image = tf.transpose(x, [0, 2, 3, 1]) # 前向传播,因验证时无需关注正则化损失值 y = forward.forward(x_image, False, None) inference = tf.add(x=0.0, y=y, name='inference') """ 通过变量重命名的方式来加载模型,这样在前向传播的过程中就不需要调用滑动 平均的函数获取平均值了。这样就可以完全共用前向传播过程了。 通过使用variables_to_restore函数,可以使在加载模型的时候将影子变量直接 映射到变量的本身,所以我们在获取变量的滑动平均值的时候只需要获取到变量 的本身值而不需要去获取影子变量 """ var_averages = tf.compat.v1.train.ExponentialMovingAverage( train.MOVING_AVERAGE_DECAY) var_to_restore = var_averages.variables_to_restore() saver = tf.compat.v1.train.Saver(var_to_restore) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: if model_path: saver.restore(sess, model_path) forward.self_print("pb_model_file[%s]" % (pb_model_file)) [save_path, save_file] = os.path.split(pb_model_file) graph_io.write_graph(sess.graph, save_path, "tmp_graph.pb") freeze_graph.freeze_graph(save_path + "/tmp_graph.pb", '', False, model_path, "inference", "save/restore_all", "save/Const:0", pb_model_file, False, "") # 转成uff uff.from_tensorflow_frozen_model( pb_model_file, output_nodes=[], preprocessor=None, input_node=[], quiet=False, text=False, list_nodes=False, output_filename=uff_model_file, write_preprocessed=False, debug_mode=False)
def main(): args, _ = process_cmdline_args() if not args.quiet: print("Loading", args.input_file) uff.from_tensorflow_frozen_model( args.input_file, output_nodes=args.output_node, preprocessor=args.preprocessor, input_node=args.input_node, quiet=args.quiet, text=args.text, list_nodes=args.list_nodes, output_filename=args.output, write_preprocessed=args.write_preprocessed, debug_mode=args.debug)
def main(): # generate test case for our engine img_input = DATA + '/VOC2012/JPEGImages/2008_000016.jpg' img, img_id, img_w, img_h = get_testcase(img_input) #img in ppm format # convert model to UFF uff_model = uff.from_tensorflow_frozen_model( '/tiny-yolo-voc/tiny-yolo-graph-tf17.pb', ["22-convolutional"]) # convert model to TensorRT model model_parser = uffparser.create_uff_parser() model_parser.register_input("input", (3, 416, 416), 0) #input name, input dims, input order model_parser.register_output("22-convolutional") # create engine, context, and runtime engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, model_parser, MAX_BATCH_SIZE, MAX_WORKSPACE) assert (engine) runtime = trt.infer.create_infer_runtime(G_LOGGER) context = engine.create_execution_context() context.set_profiler(G_PROFILER) if (TIMEIT): time_inference(context, engine, 1) else: if (VALIDATE): f = open("/tiny-yolo-voc/2012_val.txt", "r") for image_path in f: image_path = image_path.strip() image_jpg = image_path.split("/")[-1] img_input = DATA + '/VOC2012/JPEGImages/' + image_jpg img, img_id, img_w, img_h = get_testcase(img_input) out = infer( context, img, OUTPUT_SIZE, 1 ) # infer use context.enqueue(): asynchronous process with cuda stream. TensorRT does not support profiling on this at the moment # parse output output_parser = yoloparser.yolov2parser( out, output_wd, nclass, nbox, class_name, biases) result = output_parser.interpret(threshold, nms, img_w, img_h) save_results(img_input, result, img_w, img_h, img_id, "/tiny-yolo-voc/results/") else: out = infer( context, img, OUTPUT_SIZE, 1 ) # infer use context.enqueue(): asynchronous process with cuda stream. TensorRT does not support profiling on this at the moment # parse output output_parser = yoloparser.yolov2parser(out, output_wd, nclass, nbox, class_name, biases) result = output_parser.interpret(threshold, nms, img_w, img_h) save_results(img_input, result, img_w, img_h, img_id, "/tiny-yolo-voc/results/") context.destroy() engine.destroy() runtime.destroy()
def frozenToPlan(frozen_graph_filename, plan_filename, input_name, input_height, input_width, output_name, max_batch_size, max_workspace_size, data_type): # generate uff from frozen graph uff_model = uff.from_tensorflow_frozen_model( frozen_file=frozen_graph_filename, output_nodes=[output_name], output_filename=TMP_UFF_FILENAME, text=False) # convert frozen graph to engine (plan) args = [ TMP_UFF_FILENAME, plan_filename, input_name, str(input_height), str(input_width), output_name, str(max_batch_size), str(max_workspace_size), data_type # float / half ] subprocess.call([UFF_TO_PLAN_EXE_PATH] + args) # cleanup tmp file os.remove(TMP_UFF_FILENAME)
def initialize(self): # Create network. self.network = self.builder.create_network() # Do graph surgery on pb graph and convert to UFF. uff_model = uff.from_tensorflow_frozen_model( self.model_path, preprocessor="code/ssd-mobilenet/tensorrt/SSDMobileNet.py") # Parse UFF model and populate network. parser = trt.UffParser() parser.register_input("Input", [3, 300, 300], trt.UffInputOrder.NCHW) parser.register_output("Postprocessor") success = parser.parse_buffer(uff_model, self.network) if not success: raise RuntimeError("SSDMobileNet network creation failed!") # Set input dtype and format input_tensor = self.network.get_input(0) if self.input_dtype == "int8": input_tensor.dtype = trt.int8 input_tensor.dynamic_range = (-1.0, 1.0) if self.input_format == "linear": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) elif self.input_format == "chw4": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4) self.postprocess(replace_relu6=(self.dla_core is not None)) self.initialized = True
def main(args): input = [args.input_placeholder] output = args.output_placeholders.split(',') dims = map(int, args.dimensions.split(',')) assert (len(dims) == 3), 'Input dimensions must be given in CHW format.' # Convert tensorflow pb file to uff stream for tensorRT uff_model = uff.from_tensorflow_frozen_model(frozen_file=args.frozen_file, input_nodes=input, output_nodes=output) # Create parser for uff file and register input placeholder parser = uffparser.create_uff_parser() parser.register_input(args.input_placeholder, dims, uffparser.UffInputOrder_kNCHW) # Create a tensorRT engine which is ready for immediate use. # For this example, we will serialize it for fast instantiation later. G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR) engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, args.max_batch_size, 1 << args.max_workspace_size, trt.infer.DataType.FLOAT) assert (engine) # Serialize the engine to given file path serialize_engine(engine, args.file_path) engine.destroy()
def keras_to_uff(model_choice, weights): """ This is important to READ. This must be done in a Tensorflow Version <2. In otherwords a Tensorflow version where graph computation was the norm. Due to the changes in Tensorflow 2, converting to uff is basically impossible to do since all the uff conversions now use DEPRECATED and REMOVED functions and classes. If I'm honest, this is a PITA. Reference for this python file: https://devtalk.nvidia.com/default/topic/1028464/jetson-tx2/converting-tf-model-to-tensorrt-uff-format/ """ K.set_learning_phase(0) if model_choice == 'fastscnn': model = fast_scnn.model(num_classes=20, input_size=(1024, 2048, 3)) input_size = '1024x2048' elif model_choice == 'deeplabv3+': # Its important here to set the output stride to 8 for inferencing model = deeplabv3plus.model(num_classes=20, input_size=(1024, 2048, 3), depthwise=True, output_stride=8) input_size = '1024x2048' elif model_choice == 'separable_unet': # Was trained on a lower resolution model = separable_unet.model(num_classes=20, input_size=(512, 1024, 3)) input_size = '512x1024' # Whatever the model is, load the weights chosen model.load_weights(weights) # Plot the model for visual purposes in case anyone asks what you used tf.keras.utils.plot_model(model, to_file=os.path.join('./results', model_choice, model_choice + '.png'), show_shapes=True) # Get the outputs outputs = [] for output in model.outputs: outputs.append(output.name.split(":")[0]) # Set the filename for the frozen graph frozen_graph = os.path.join('./results', model_choice, model_choice + '_' + input_size + '.pb') # Let's begin session = K.get_session() # Get the graph definition and remove training nodes, ignore deprecation warnings here... graph_def = tf.graph_util.convert_variables_to_constants( session, session.graph_def, outputs) graph_def = tf.graph_util.remove_training_nodes(graph_def) # Write frozen graph to file with open(frozen_graph, 'wb') as f: f.write(graph_def.SerializeToString()) f.close() # Get the uff filename uff_filename = frozen_graph.replace('.pb', '.uff') # Convert and save as uff and we're done uff_model = uff.from_tensorflow_frozen_model(frozen_graph, outputs, output_filename=uff_filename)
def create_graph(self): uff_model = uff.from_tensorflow_frozen_model( self.facenet, ['InceptionResnetV2/Bottleneck/BatchNorm/Reshape_1'], list_nodes=False) G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR) parser = uffparser.create_uff_parser() parser.register_input('input_image', (3, 160, 160), 0) parser.register_output( 'InceptionResnetV2/Bottleneck/BatchNorm/Reshape_1') engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1, 1 << 31) parser.destroy() runtime = trt.infer.create_infer_runtime(G_LOGGER) self.context = engine.create_execution_context() self.output = np.empty((1, 128), dtype=np.float32) self.d_input = cuda.mem_alloc(1 * 160 * 160 * 3 * 4) self.d_output = cuda.mem_alloc(1 * 128 * 4) self.bindings = [int(self.d_input), int(self.d_output)] print('here') self.stream = cuda.Stream()
def convert_to_uff(fpath): fpath = Path(fpath) pb_path = Path('%s/%s.pb' % (fpath.parent.as_posix(), fpath.stem)) uff_path = Path('%s/%s.uff' % (fpath.parent.as_posix(), fpath.stem)) print("Converting .pb file to .uff file, \"%s\" >> \"%s\"" % (pb_path, uff_path)) #subprocess.run(["convert-to-uff", "--output=%s" % str(uff_path), str(pb_path)]) import uff uff.from_tensorflow_frozen_model(str(pb_path), output_filename=str(uff_path)) if os.path.exists(uff_path) and os.path.isfile(uff_path): print("Conversion of .pb to .uff file succeeded") return True else: print("Conversion of .pb to .uff file failed") return False
def build_engine(model_data): uff_model = uff.from_tensorflow_frozen_model(model_data.pb_file_path) with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = GiB(5) builder.fp16_mode = model_data.fp16_mode parser.register_input(model_data.input_name, model_data.input_shape) parser.register_output(model_data.output_name) parser.parse_buffer(uff_model, network) return builder.build_cuda_engine(network)
def _pb_to_uff(self, frozen_graph_path, model_info): input_node = list() for i in model_info.inputs: input_node.append(",".join([i.name, i.name, str(np.dtype(i.dtype.as_numpy_dtype)), ",".join(str(i) for i in i.shape)])) uff_model = uff.from_tensorflow_frozen_model(frozen_graph_path, [o.name for o in model_info.outputs], output_filename=self.uff_path, input_node=input_node) _LOGGER.info('pb_to_uff:: convert to uff success, output file: %s', self.uff_path) return uff_model
def main(): sess = tf.InteractiveSession() saver = tf.train.import_meta_graph(PATH+'.meta') saver.restore(sess, PATH) #PRED = sess.run('output/BiasAdd:0',feed_dict={'input:0':X})#,'hidden_state:0':np.zeros((3,X.shape[0],64))}) # RNN, GRU OK HS = np.zeros((3,X.shape[0],64)) # LSTM #HS = np.zeros((3,2,X.shape[0],64)) # MLP, CNN # No HS, input --> inputs, pred [:,-1,:] --> [:,:] start = datetime.datetime.now() for i in range(inference_Loop): PRED = sess.run('output/BiasAdd:0',feed_dict={'input:0':X,'hidden_state:0':HS}) end = datetime.datetime.now() elapsed_time = (end-start).total_seconds() print(PRED.shape) #--------------------------------------------------# #Get output nodes Names #--------------------------------------------------# graph = sess.graph #print([node.name for node in graph.as_graph_def().node]) output_node_names=[node.name for node in graph.as_graph_def().node] #----------------------------------------------------------------# #Make a frozen model(.pb) of the TF model in order to convert it into UFF# #----------------------------------------------------------------# # We use a built-in TF helper to export variables to constants output_graph_def = tf.graph_util.convert_variables_to_constants( sess, # The session is used to retrieve the weights tf.get_default_graph().as_graph_def(), # The graph_def is used to retrieve the nodes output_node_names # The output node names are used to select the usefull nodes ) input_checkpoint=PATH # We precise the file fullname of our freezed graph absolute_model_dir = "/".join(input_checkpoint.split('/')[:-1]) output_graph = absolute_model_dir + "/frozen_model.pb" # Finally we serialize and dump the output graph to the filesystem with tf.gfile.GFile(output_graph, "wb") as f: f.write(output_graph_def.SerializeToString()) #print("%d ops in the final graph." % len(output_graph_def.node)) #----------------------------------# #Conversion TF graph def as UFF # #---------------------------------# uff_model = uff.from_tensorflow_frozen_model(ROOT+'/frozen_model.pb',['output/BiasAdd'],output_filename = ModelData.MODEL_FILE) print("yes")
def convert_tf_model_to_trt(tf_model_filename, trt_model_filename, model_data_layout, input_layer_name, input_height, input_width, output_layer_name, output_data_type, max_workspace_size, max_batch_size): "Convert an tf_model_filename into a trt_model_filename using the given parameters" uff_model = uff.from_tensorflow_frozen_model(tf_model_filename) TRT_LOGGER = trt.Logger(trt.Logger.WARNING) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: if model_data_layout == 'NHWC': parser.register_input(input_layer_name, [input_height, input_width, 3], trt.UffInputOrder.NHWC) else: parser.register_input(input_layer_name, [3, input_height, input_width], trt.UffInputOrder.NCHW) parser.register_output(output_layer_name) if not parser.parse_buffer(uff_model, network): raise RuntimeError( "UFF model parsing (originally from {}) failed. Error: {}". format(tf_model_filename, parser.get_error(0).desc())) if (output_data_type == 'fp32'): print('Converting into fp32 (default), max_batch_size={}'.format( max_batch_size)) else: if not builder.platform_has_fast_fp16: print( 'Warning: This platform is not optimized for fast fp16 mode' ) builder.fp16_mode = True print('Converting into fp16, max_batch_size={}'.format( max_batch_size)) builder.max_workspace_size = max_workspace_size builder.max_batch_size = max_batch_size trt_model_object = builder.build_cuda_engine(network) try: serialized_trt_model = trt_model_object.serialize() with open(trt_model_filename, "wb") as trt_model_file: trt_model_file.write(serialized_trt_model) except: raise RuntimeError( 'Cannot serialize or write TensorRT engine to file {}.'.format( trt_model_filename))
def model_to_uff(model_path): # Transform graph using graphsurgeon to map unsupported TensorFlow # operations to appropriate TensorRT custom layer plugins #==== customn # dynamic_graph = gs.DynamicGraph(model_path) # dynamic_graph.collapse_namespaces(prepare_namespace_plugin_map()) # Save resulting graph to UFF file output_uff_path = model_path_to_uff_path(model_path) # uff.from_tensorflow( # dynamic_graph.as_graph_def(), # [ModelData.OUTPUT_NAME], # output_filename=output_uff_path, # text=True # ) uff.from_tensorflow_frozen_model( model_path, [ModelData.OUTPUT_NAME], output_filename=output_uff_path, text=True) return output_uff_path
def create_and_save_inference_engine(): INPUT_LAYERS = [config['input_layer']] OUTPUT_LAYERS = [config['output_layer']] INFERENCE_BATCH_SIZE = config['inference_batch_size'] INPUT_C = 1 INPUT_H = config['image_dim'] INPUT_W = config['image_dim'] # Load your newly created Tensorflow frozen model and convert it to UFF uff_model = uff.from_tensorflow_frozen_model(config['frozen_model_file'], OUTPUT_LAYERS) # Now that we have a UFF model, we can generate a TensorRT engine by creating a logger for TensorRT. G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR) # Create a UFF parser to parse the UFF file created from your TF Frozen model and identify the desired input and output nodes parser = uffparser.create_uff_parser() parser.register_input(INPUT_LAYERS[0], (INPUT_C, INPUT_H, INPUT_W), 0) parser.register_output(OUTPUT_LAYERS[0]) # Build your TensorRT inference engine # This step performs (1) Tensor fusion (2) Reduced precision calibration # (3) Target-specific autotuning (4) Tensor memory management # Pass the logger, parser, the UFF model stream, # and some settings (max batch size and max workspace size) # to a utility function that will create the engine for us # Build your TensorRT inference engine if (config['precision'] == 'fp32'): engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, INFERENCE_BATCH_SIZE, 1 << 20, trt.infer.DataType.FLOAT) elif (config['precision'] == 'fp16'): engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, INFERENCE_BATCH_SIZE, 1 << 20, trt.infer.DataType.HALF) elif (config['precision'] == 'int8'): engine = trt.utils.uff_file_to_trt_engine(G_LOGGER, uff_model, parser, INFERENCE_BATCH_SIZE, 1 << 20, trt.infer.DataType.INT8) # engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1, 1 << 20) # Serialize TensorRT engine to a file for when you are ready to deploy your model. save_path = str(config['engine_save_dir']) + "tf_model_batch" \ + str(INFERENCE_BATCH_SIZE) + "_" + str(config['precision']) + ".engine" trt.utils.write_engine_to_file(save_path, engine.serialize()) print("Saved TensorRT engine to {}".format(save_path))
def main(): #----------------------------------# #Conversion TF graph def as UFF # #---------------------------------# uff_model = uff.from_tensorflow_frozen_model( PATH_pb, [ModelData.OUTPUT_NAME], output_filename=ModelData.MODEL_FILE) #---------# #Load data# #---------# _URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip' path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True) PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered') train_dir = os.path.join(PATH, 'train') validation_dir = os.path.join(PATH, 'validation') train_cats_dir = os.path.join( train_dir, 'cats') # directory with our training cat pictures train_dogs_dir = os.path.join( train_dir, 'dogs') # directory with our training dog pictures validation_cats_dir = os.path.join( validation_dir, 'cats') # directory with our validation cat pict$ validation_dogs_dir = os.path.join( validation_dir, 'dogs') # directory with our validation dog pict$ #----------------# #Data Preparation# #----------------# #Read image from disk + preprocess them into tensors + set up generators to convert images into batc$ train_image_generator = ImageDataGenerator( rescale=1. / 255) # Generator for our training data validation_image_generator = ImageDataGenerator( rescale=1. / 255) # Generator for our validation data #Applies rescaling and resizes train_data_gen = train_image_generator.flow_from_directory( batch_size=batch_size, directory=train_dir, shuffle=True, target_size=(IMG_HEIGHT, IMG_WIDTH), class_mode='binary') val_data_gen = validation_image_generator.flow_from_directory( batch_size=batch_size, directory=validation_dir, target_size=(IMG_HEIGHT, IMG_WIDTH), class_mode='binary')
def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--input_graph', default='frozen_inference_graph.pb', help='Frozen .pb graph to be converted') parser.add_argument('-o', '--output_graph', default='frozen_inference_graph.uff', help='Output .uff graph') args = parser.parse_args() _ = uff.from_tensorflow_frozen_model(args.input_graph, output_nodes=['NMS'], preprocessor='./preprocessor.py', output_filename=args.output_graph)
def create_and_save_inference_engine(): # Define network parameters, including inference batch size, name & dimensionality of input/output layers INPUT_LAYERS = [config['input_layer']] OUTPUT_LAYERS = [config['out_layer']] INFERENCE_BATCH_SIZE = config['inference_batch_size'] INPUT_C = 3 INPUT_H = config['image_dim'] INPUT_W = config['image_dim'] # Load your newly created Tensorflow frozen model and convert it to UFF uff_model = uff.from_tensorflow_frozen_model(config['frozen_model_file'], OUTPUT_LAYERS) # Create a UFF parser to parse the UFF file created from your TF Frozen model parser = uffparser.create_uff_parser() parser.register_input(INPUT_LAYERS[0], (INPUT_C,INPUT_H,INPUT_W),0) parser.register_output(OUTPUT_LAYERS[0]) # Build your TensorRT inference engine if(config['precision'] == 'fp32'): engine = trt.utils.uff_to_trt_engine( G_LOGGER, uff_model, parser, INFERENCE_BATCH_SIZE, 1<<20, trt.infer.DataType.FLOAT ) elif(config['precision'] == 'fp16'): engine = trt.utils.uff_to_trt_engine( G_LOGGER, uff_model, parser, INFERENCE_BATCH_SIZE, 1<<20, trt.infer.DataType.HALF ) # Serialize TensorRT engine to a file for when you are ready to deploy your model. save_path = str(config['engine_save_dir']) + "keras_vgg19_b" \ + str(INFERENCE_BATCH_SIZE) + "_"+ str(config['precision']) + ".engine" trt.utils.write_engine_to_file(save_path, engine.serialize()) print("Saved TRT engine to {}".format(save_path))
def main(): tf_freeze_model = 'car_series/frozen_graph.pb' input_node = 'input' out_node = 'InceptionV4/Logits/Predictions' uff_model = uff.from_tensorflow_frozen_model(tf_freeze_model, [out_node]) #Convert Tensorflow model to TensorRT model parser = uffparser.create_uff_parser() parser.register_input(input_node, (CHANNEL, INPUT_H, INPUT_W), 0) parser.register_output(out_node) engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, MAX_BATCHSIZE, MAX_WORKSPACE) trt.utils.write_engine_to_file("car_series/car_series_tensorrt.engine", engine.serialize())
def _pb_uff_parser(pb_dir, network, input_node_names, input_node_shapes, output_node_names): parser = trt.UffParser() # parse network for input_node_name, input_node_shape in zip(input_node_names, input_node_shapes): parser.register_input(input_node_name, input_node_shape) for output_node_name in output_node_names: parser.register_output(output_node_name) uff_buffer = uff.from_tensorflow_frozen_model( frozen_file=pb_dir, output_nodes=output_node_names, output_filename='buffer.uff', text=False, debug_mode=True) parser.parse_buffer(uff_buffer, network) os.remove('buffer.uff') return network
def main(): MAX_WORKSPACE = 1 << 30 MAX_BATCHSIZE = 1 # 若用了output_filename参数则返回的是NULL,否则返回的是序列化以后的UFF模型数据 uff_model = uff.from_tensorflow_frozen_model( frozen_model_path, frozen_node_name ) #, output_filename=UFF_PATH, text=True, list_nodes=True) parser = uffparser.create_uff_parser() parser.register_input(frozen_input_name, NET_INPUT_IMAGE_SHAPE, 0) # 0表示输入通道顺序NCHW,1表示输入通道顺序为NHWC parser.register_output(frozen_node_name[0]) engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, MAX_BATCHSIZE, MAX_WORKSPACE) # save engine trt.utils.write_engine_to_file(ENGINE_PATH, engine.serialize()) assert (engine) # parser.destroy() context = engine.create_execution_context() print("\n| TEST CASE | PREDICTION |") pair = imgTestData[0] correct = 0 for img, label in pair: output = infer(context, img, 1) # my frozen graph output is logists , here need convert to softmax softmax = np.exp(output) / np.sum(np.exp(output)) predict = np.argmax(softmax) if int(label) == predict: correct += 1 print( "|-------|--------|--------------------------------------------------------" ) print("| " + str(label) + " | " + str(predict) + " | " + str(['{:.2f}%'.format(i * 100) for i in softmax]) + " ") accuracy = correct / len(pair) print("Accuracy = ", accuracy)
def main(): args = parse_args() # Convert pb to uff uff_model = uff.from_tensorflow_frozen_model(args.pb_path, [args.output_node]) # Create UFF parser and logger parser = uffparser.create_uff_parser() INPUT_SIZE = [3 , args.image_size , args.image_size] parser.register_input(args.input_node,INPUT_SIZE , 0) parser.register_output(args.output_node) G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO) # Convert uff to plan if args.calib_images_dir: calibration_files = [os.path.join(args.calib_images_dir,i) for i in os.listdir(args.calib_images_dir)] else: calibration_files = [] batchstream = ImageBatchStream(args.max_batch_size, calibration_files,INPUT_SIZE) int8_calibrator = PythonEntropyCalibrator([args.input_node], batchstream) if args.int8: engine = trt.utils.uff_to_trt_engine( G_LOGGER, uff_model, parser, args.max_batch_size, args.max_workspace, datatype = trt.infer.DataType.INT8, calibrator = int8_calibrator ) else: engine = trt.utils.uff_to_trt_engine( G_LOGGER, uff_model, parser, args.max_batch_size, args.max_workspace ) trt.utils.write_engine_to_file(args.engine_path, engine.serialize())
def convert_uff_from_frozen_model(frozen_graph, model_output, dest_path='/tmp', dest_name='converted.uff'): """Convert the frozen model file to UFF format model This function is for converting directly from frozen model file which is done by freeze_graph(). So this frozen file will contains the serialization of GraphDef and Variables data in const value. Args: frozen_graph: The frozen model file (*.pb) dest_path: The destination path of the output UFF file dest_name: The name of the UFF file Returns: None """ uff_model = uff.from_tensorflow_frozen_model(frozen_graph, model_output) dump = open(os.path.join(dest_path, dest_name), 'wb') dump.write(uff_model) dump.close()
def create_graph(self): """""" uff_model = uff.from_tensorflow_frozen_model( self.model_file, ['InceptionResnetV2/Logits/Predictions']) G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR) parser = uffparser.create_uff_parser() parser.register_input('input_image', (3, 512, 512), 0) parser.register_output('InceptionResnetV2/Logits/Predictions') engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1, 1 << 32) parser.destroy() runtime = trt.infer.create_infer_runtime(G_LOGGER) self.context = engine.create_execution_context() self.output = np.empty(len(self.id2name), dtype=np.float32) self.d_input = cuda.mem_alloc(1 * 512 * 512 * 3 * 4) self.d_output = cuda.mem_alloc(1 * len(self.id2name) * 4) self.bindings = [int(self.d_input), int(self.d_output)] self.stream = cuda.Stream()
def create_engine(name, model_path, height, width, input_layer='image', output_layer='Openpose/concat_stage7', half16=False): if not os.path.exists(name): # Load your newly created Tensorflow frozen model and convert it to UFF # import pdb; pdb.set_trace(); uff_model = uff.from_tensorflow_frozen_model( model_path, [output_layer]) # , output_filename = 'mobilepose.uff') dump = open(name.replace('engine', 'uff'), 'wb') dump.write(uff_model) dump.close() # Create a UFF parser to parse the UFF file created from your TF Frozen model parser = uffparser.create_uff_parser() parser.register_input(input_layer, (3, height, width), 0) parser.register_output(output_layer) # Build your TensorRT inference engine # This step performs (1) Tensor fusion (2) Reduced precision # (3) Target autotuning (4) Tensor memory management engine = trt.utils.uff_to_trt_engine( G_LOGGER, uff_model, parser, 1, 1 << 20, datatype=trt.infer.DataType.FLOAT if not half16 else trt.infer.DataType.HALF) trt.utils.write_engine_to_file(name, engine.serialize()) else: engine = trt.utils.load_engine(G_LOGGER, name) return engine
checkpoint_path = saver.save(sess, snapshot_dir, global_step=0, latest_filename='checkpoint_state') inference_graph = sess.graph graph_def_file = 'graphdef.pb' graph_io.write_graph(inference_graph, '.', graph_def_file) in_names = nn_test.inputs[0].op.name out_names = nn_test.outputs[0].op.name print('Input name:', in_names, 'Output name:', out_names) frozen_model_file = 'frozen.pb' freeze_graph.freeze_graph(graph_def_file, "", False, checkpoint_path, out_names, "save/restore_all", "save/Const:0", frozen_model_file, False, "") ### Parse Network via UFF ### uff_model = uff.from_tensorflow_frozen_model('frozen.pb', [out_names]) parser = uffparser.create_uff_parser() parser.register_input(in_names, (3, 32, 32), 0) parser.register_output(out_names) G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO) engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 256, 1 << 20, trt.infer.DataType.HALF)
import pycuda.autoinit import numpy as np from random import randint # generate a random test case from PIL import Image from matplotlib.pyplot import imshow #to show test case import time #import system tools import os import sys import uff OUTPUT_NAMES = "MatMul" tf_model = '/home/lee/models/frozen_model.pb' uff_model = uff.from_tensorflow_frozen_model(tf_model, [OUTPUT_NAMES]) ''' G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR) parser = uffparser.create_uff_parser() parser.register_input("Placeholder", (3,240,320), 0) parser.register_output("OUTPUT_NAMES") engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1, 1 << 31) checkpoint = tf.train.get_checkpoint_state('') input_checkpoint = checkpoint.model_checkpoint_path
import tensorrt as trt import uff from tensorrt.parsers import uffparser G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO) uff_model = uff.from_tensorflow_frozen_model("final.pb", ["dense_2/Softmax"]) INFERENCE_BATCH_SIZE = 256 parser = uffparser.create_uff_parser() parser.register_input("conv2d_1_input", (1, 28, 28), 0) parser.register_output("dense_2/Softmax") engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, INFERENCE_BATCH_SIZE, 1<<20, trt.infer.DataType.FLOAT) trt.utils.write_engine_to_file("test_engine.engine", engine.serialize())