def main(): parser = argparse.ArgumentParser() parser.add_argument('model', type=str, choices=list(MODEL_SPECS.keys())) args = parser.parse_args() # initialize if trt.__version__[0] < '7': ctypes.CDLL(LIB_FILE) TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') # compile the model into TensorRT engine model = args.model spec = MODEL_SPECS[model] dynamic_graph = add_plugin(gs.DynamicGraph(spec['input_pb']), model, spec) _ = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=spec['tmp_uff'], text=True, debug_mode=DEBUG_UFF) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', INPUT_DIMS) parser.register_output('MarkOutput_0') parser.parse(spec['tmp_uff'], network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(spec['output_bin'], 'wb') as f: f.write(buf)
def graphsurgeon_cleanup(LOG_DIR, input_model_name='output_model.pb', cleaned_model_name='output_model_aftersurgery.pb'): """ Loads the tensorflow frozen_graph and cleans up with nvidia's graphsurgeon """ assert os.path.isfile( LOG_DIR + '/' + input_model_name), "[graphsurgeon_cleanup]The .pb file=" + str( input_model_name) + " does not exist" import graphsurgeon as gs print tcol.HEADER, '[graphsurgeon_cleanup] graphsurgeon.__version__', gs.__version__, tcol.ENDC DG = gs.DynamicGraph() print tcol.OKGREEN, '[graphsurgeon_cleanup] READ tensorflow Graph using graphsurgeon.DynamicGraph: ', LOG_DIR + '/' + input_model_name, tcol.ENDC DG.read(LOG_DIR + '/' + input_model_name) # Remove control variable first all_switch = DG.find_nodes_by_op('Switch') DG.forward_inputs(all_switch) print 'Write (after graphsurgery) : ', LOG_DIR + '/' + cleaned_model_name DG.write(LOG_DIR + '/' + cleaned_model_name) if os.path.isdir(LOG_DIR + '/graphsurgeon_cleanup'): pass else: os.mkdir(LOG_DIR + '/graphsurgeon_cleanup') DG.write_tensorboard(LOG_DIR + '/graphsurgeon_cleanup') # import code # code.interact( local=locals() ) print tcol.HEADER, '[graphsurgeon_cleanup] END', tcol.ENDC
def main(): TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') # compile the model into TensorRT engine model = 'ssd_mobilenet_v2_coco' spec = MODEL_SPECS[model] if not os.path.exists(spec['tmp_uff']): dynamic_graph = add_plugin(gs.DynamicGraph(spec['input_pb']), spec) uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=spec['tmp_uff'], text=True, debug_mode=DEBUG_UFF) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', INPUT_DIMS) parser.register_output('MarkOutput_0') parser.parse(spec['tmp_uff'], network) print("Building Tensorrt engine. This may take a few minutes.") engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(spec['output_bin'], 'wb') as f: f.write(buf) print("Save engine.")
def create_trt_model_bin(): ctypes.CDLL(LIB_FLATTEN_PATH) # initialize trt_logger = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(trt_logger, '') # compile model into TensorRT if not os.path.isfile(MODEL_TRT_BIN_PATH): dynamic_graph = model.add_plugin(gs.DynamicGraph(MODEL_PATH)) uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), model.output_name, output_filename='tmp.uff') with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', model.dims) parser.register_output('MarkOutput_0') parser.parse('tmp.uff', network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(MODEL_TRT_BIN_PATH, 'wb') as f: f.write(buf)
def export_trt(pb_file, output_dir, num_classes=90, neuralet_adaptive_model=1): """ Exports the Tensorflow pb models to TensorRT engines. Args: pb_file: The path of input pb file output_dir: A directory to store the output files num_classes: Detector's number of classes """ lib_flatten_concat_file = "exporters/libflattenconcat.so.6" # initialize if trt.__version__[0] < '7': ctypes.CDLL(lib_flatten_concat_file) TRT_LOGGER = trt.Logger(trt.Logger.WARNING) trt.init_libnvinfer_plugins(TRT_LOGGER, '') # compile the model into TensorRT engine model = "ssd_mobilenet_v2_coco" if not os.path.isfile(pb_file): raise FileNotFoundError( 'model does not exist under: {}'.format(pb_file)) if not os.path.isdir(output_dir): print("the provided output directory : {0} is not exist".format( output_dir)) print("creating output directory : {0}".format(output_dir)) os.makedirs(output_dir, exist_ok=True) dynamic_graph = plugin.add_plugin_and_preprocess(gs.DynamicGraph(pb_file), model, num_classes, neuralet_adaptive_model) model_file_name = ".".join((pb_file.split("/")[-1]).split(".")[:-1]) uff_path = os.path.join(output_dir, model_file_name + ".uff") _ = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=uff_path, text=True, debug_mode=False) input_dims = (3, 300, 300) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, builder.create_builder_config( ) as builder_config, trt.UffParser() as parser: builder_config.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder_config.set_flag(trt.BuilderFlag.FP16) parser.register_input('Input', input_dims) parser.register_output('MarkOutput_0') parser.parse(uff_path, network) engine = builder.build_engine(network, builder_config) buf = engine.serialize() engine_path = os.path.join(output_dir, model_file_name + ".bin") with open(engine_path, 'wb') as f: f.write(buf) print( "your model has been converted to trt engine successfully under : {}" .format(engine_path))
def model_to_uff(model_path): # Transform graph using graphsurgeon to map unsupported TensorFlow # operations to appropriate TensorRT custom layer plugins dynamic_graph = gs.DynamicGraph(model_path) dynamic_graph.collapse_namespaces(prepare_namespace_plugin_map()) # Save resulting graph to UFF file output_uff_path = model_path_to_uff_path(model_path) uff.from_tensorflow(dynamic_graph.as_graph_def(), [ModelData.OUTPUT_NAME], output_filename=output_uff_path, text=True) return output_uff_path
def model_to_uff(model_path, uff_model_path): print("model_path:", model_path) dynamic_graph = gs.DynamicGraph(model_path) dynamic_graph = ModelParser.convert_unsupported_nodes_to_plugins(dynamic_graph) if os.path.exists(uff_model_path) is False: uff.from_tensorflow( dynamic_graph.as_graph_def(), [DetectionModel.output_name], output_filename=uff_model_path, text=True )
def model_to_uff(model_path, output_uff_path, silent=False): """Takes frozen .pb graph, converts it to .uff and saves it to file. Args: model_path (str): .pb model path output_uff_path (str): .uff path where the UFF file will be saved silent (bool): if True, writes progress messages to stdout """ dynamic_graph = gs.DynamicGraph(model_path) dynamic_graph = ssd_unsupported_nodes_to_plugin_nodes(dynamic_graph) uff.from_tensorflow(dynamic_graph.as_graph_def(), [ModelData.OUTPUT_NAME], output_filename=output_uff_path, text=True)
def main(): config = configparser.ConfigParser() parser = argparse.ArgumentParser() parser.add_argument('--config', required=True) args = parser.parse_args() config.read(args.config) lib_flatten_concat_file = config['LIBFLATTENCONCAT']['Path'] # initialize if trt.__version__[0] < '7': ctypes.CDLL(lib_flatten_concat_file) TRT_LOGGER = trt.Logger(trt.Logger.WARNING) trt.init_libnvinfer_plugins(TRT_LOGGER, '') # compile the model into TensorRT engine model = config['MODEL']['Name'] model_path = config['MODEL']['Input'] url = config['MODEL']['DownloadPath'] if not os.path.isfile(model_path): print('model does not exist under: ', model_path, 'downloading from ', url) wget.download(url, model_path) dynamic_graph = plugin.add_plugin_and_preprocess( gs.DynamicGraph(config['MODEL']['Input']), model, config) _ = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=config['MODEL']['TmpUff'], text=True, debug_mode=False) input_dims = tuple( [int(x) for x in config['MODEL']['InputDims'].split(',')]) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', input_dims) parser.register_output('MarkOutput_0') parser.parse(config['MODEL']['TmpUff'], network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(config['MODEL']['OutputBin'], 'wb') as f: f.write(buf)
def prepare_model(model=InceptionV2, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1, calib_dataset=Path(__file__).parent / 'VOCdevkit' / 'VOC2007' / 'JPEGImages'): import uff from . import calibrator if not model.PATH.exists(): # initialize TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') runtime = trt.Runtime(TRT_LOGGER) # compile model into TensorRT dynamic_graph = gs.DynamicGraph(str(model.TF_PATH)) dynamic_graph = model.add_plugin(dynamic_graph) uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), model.OUTPUT_NAME, output_filename='tmp.uff') with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 30 builder.max_batch_size = batch_size if trt_engine_datatype == trt.DataType.HALF: builder.fp16_mode = True elif trt_engine_datatype == trt.DataType.INT8: # TODO: download data if it doesn't exist # TODO: use DLA builder.fp16_mode = True builder.int8_mode = True builder.int8_calibrator = calibrator.SSDEntropyCalibrator( data_dir=calib_dataset, cache_file=Path(__file__).parent / 'INT8CacheFile') parser.register_input('Input', model.INPUT_SHAPE) parser.register_output('MarkOutput_0') parser.parse('tmp.uff', network) engine = builder.build_cuda_engine(network) # save engine buf = engine.serialize() with open(model.PATH, 'wb') as f: f.write(buf) Path('tmp.uff').unlink()
def convert_to_tensorrt(args, input_dims, graph_chars=None): TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') input_dims_corrected = (input_dims[3], input_dims[1], input_dims[2]) graph = add_plugin(gs.DynamicGraph(args.input), input_dims_corrected, graph_chars=graph_chars) print(graph.find_nodes_by_name("image_tensor")) try: uff.from_tensorflow(graph.as_graph_def(), output_nodes=['NMS'], output_filename=(args.output_dir + ".uff"), text=args.debug, write_preprocessed=args.debug, debug_mode=args.debug) except TypeError as e: if e.__str__() == "Cannot convert value 0 to a TensorFlow DType.": raise EnvironmentError( "Please modify your graphsurgeon package according to the following:\n" "https://github.com/AastaNV/TRT_object_detection#update-graphsurgeon-converter" ) if args.no_cuda: exit(0) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', input_dims_corrected) parser.register_output('MarkOutput_0') parser.parse(args.output_dir + ".uff", network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(args.output_dir + '_tensorrt.bin', 'wb') as f: f.write(buf)
def build_engine(cls, trt_logger, batch_size, calib_dataset=Path.home() / 'VOCdevkit' / 'VOC2007' / 'JPEGImages'): import graphsurgeon as gs import uff from . import calibrator # compile model into TensorRT dynamic_graph = gs.DynamicGraph(str(cls.MODEL_PATH)) dynamic_graph = cls.add_plugin(dynamic_graph) uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), [cls.OUTPUT_NAME], quiet=True) with trt.Builder(trt_logger) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 30 builder.max_batch_size = batch_size logging.info('Building engine with batch size: %d', batch_size) logging.info('This may take a while...') if builder.platform_has_fast_fp16: builder.fp16_mode = True if builder.platform_has_fast_int8: builder.int8_mode = True builder.int8_calibrator = calibrator.SSDEntropyCalibrator( cls.INPUT_SHAPE, data_dir=calib_dataset, cache_file=Path(__file__).parent / f'{cls.__name__}_calib_cache') parser.register_input('Input', cls.INPUT_SHAPE) parser.register_output('MarkOutput_0') parser.parse_buffer(uff_model, network) engine = builder.build_cuda_engine(network) if engine is None: return None logging.info("Completed creating Engine") with open(cls.ENGINE_PATH, 'wb') as engine_file: engine_file.write(engine.serialize()) return engine
def model_to_uff(model_path, output_uff_path, silent=False): """Takes frozen .pb graph, converts it to .uff and saves it to file. Args: model_path (str): .pb model path output_uff_path (str): .uff path where the UFF file will be saved silent (bool): if False, writes progress messages to stdout """ #获取相应的动态图 #DynamicGraph可以搜索和修改一个tensorflow GraphDef dynamic_graph = gs.DynamicGraph(model_path) #ssd_unsupported_nodes_to_plugin_nodes参考本文件下的实现 #修改相应的计算图,用自定义插件代替tensorrt中不支持的图层 dynamic_graph = ssd_unsupported_nodes_to_plugin_nodes(dynamic_graph) #完成相应的转换 uff.from_tensorflow(dynamic_graph.as_graph_def(), [ModelData.OUTPUT_NAME], output_filename=output_uff_path, text=True)
def convert_to_uff(model, frozen_filename, uff_filename): # First freeze the graph and remove training nodes. output_names = model.output.op.name # output_names = "dense_2/MatMul" sess = get_session() frozen_graph = tf.graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), [output_names]) frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph) # Save the model with open(frozen_filename, "wb") as fptr: fptr.write(frozen_graph.SerializeToString()) tf.io.write_graph( sess.graph_def, '/home/codesteller/workspace/ml_workspace/trt_ws/trt-custom-plugin/saved_model/frozen_model', 'train.pbtxt', as_text=True) print_graphdef( tf.get_default_graph().as_graph_def(), '/home/codesteller/workspace/ml_workspace/trt_ws/' 'trt-custom-plugin/saved_model/frozen_model/train.txt') # Transform graph using graphsurgeon to map unsupported TensorFlow # operations to appropriate TensorRT custom layer plugins dynamic_graph = gs.DynamicGraph(frozen_graph) create_plugin_node(dynamic_graph) print_dynamic_graph( dynamic_graph, filename= '/home/codesteller/workspace/ml_workspace/trt_ws/trt-custom-plugin/' 'saved_model/frozen_model/final_node_graph.txt') uff_model = uff.from_tensorflow(dynamic_graph, [output_names]) with open(uff_filename, "wb") as fptr: fptr.write(uff_model)
def convert(self): dynamic_graph = self.add_plugin(gs.DynamicGraph(self.spec['input_pb']), self.spec) _ = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=self.spec['tmp_uff'], text=True, debug_mode=DEBUG_UFF) with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', self.inputDims) parser.register_output('MarkOutput_0') parser.parse(self.spec['tmp_uff'], network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(self.spec['output_bin'], 'wb') as f: f.write(buf)
#!/usr/bin/env python3 import graphsurgeon as gs import tensorflow as tf import tensorrt as trt import uff if __name__ == "__main__": data_type = trt.DataType.HALF #data_type = trt.DataType.FLOAT output_node = "test_model/model/logits/linear/BiasAdd" input_node = "test_model/model/images/truediv" graph_pb = "optimized_tRT.pb" engine_file = "sample.engine" dynamic_graph = gs.DynamicGraph(graph_pb) # replace LeakyRelu wiht LReLU_TRT plugin nodes = [n.name for n in dynamic_graph.as_graph_def().node] ns = {} for node in nodes: if "LeakyRelu" in node: ns[node] = gs.create_plugin_node(name=node, op="LReLU_TRT", negSlope=0.1) dynamic_graph.collapse_namespaces(ns) # convert to UFF uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=[output_node]) # convert to TRT G_LOGGER = trt.Logger(trt.Logger.ERROR) trt.init_libnvinfer_plugins(G_LOGGER, "")
def ssd_pipeline_to_uff(checkpoint_path, config_path, tmp_dir='exported_model'): import graphsurgeon as gs from object_detection import exporter import tensorflow as tf import uff # TODO(@jwelsh): Implement by extending model builders with # TensorRT plugin stubs. Currently, this method uses pattern # matching which is a bit hacky and subject to fail when TF # object detection API exporter changes. We should add object # detection as submodule to avoid versioning incompatibilities. config = _load_config(config_path) frozen_graph_path = os.path.join(tmp_dir, FROZEN_GRAPH_NAME) # get input shape channels = 3 height = config.model.ssd.image_resizer.fixed_shape_resizer.height width = config.model.ssd.image_resizer.fixed_shape_resizer.width tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True # export checkpoint and config to frozen graph with tf.Session(config=tf_config) as tf_sess: with tf.Graph().as_default() as tf_graph: subprocess.call(['mkdir', '-p', tmp_dir]) exporter.export_inference_graph('image_tensor', config, checkpoint_path, tmp_dir, input_shape=[1, None, None, 3]) dynamic_graph = gs.DynamicGraph(frozen_graph_path) # remove all assert nodes #all_assert_nodes = dynamic_graph.find_nodes_by_op("Assert") #dynamic_graph.remove(all_assert_nodes, remove_exclusive_dependencies=True) # forward all identity nodes all_identity_nodes = dynamic_graph.find_nodes_by_op("Identity") dynamic_graph.forward_inputs(all_identity_nodes) # create input plugin input_plugin = gs.create_plugin_node(name=TRT_INPUT_NAME, op="Placeholder", dtype=tf.float32, shape=[1, height, width, channels]) # create anchor box generator anchor_generator_config = config.model.ssd.anchor_generator.ssd_anchor_generator box_coder_config = config.model.ssd.box_coder.faster_rcnn_box_coder priorbox_plugin = gs.create_plugin_node( name="priorbox", op="GridAnchor_TRT", minSize=anchor_generator_config.min_scale, maxSize=anchor_generator_config.max_scale, aspectRatios=list(anchor_generator_config.aspect_ratios), variance=[ 1.0 / box_coder_config.y_scale, 1.0 / box_coder_config.x_scale, 1.0 / box_coder_config.height_scale, 1.0 / box_coder_config.width_scale ], featureMapShapes=_get_feature_map_shape(config), numLayers=config.model.ssd.anchor_generator.ssd_anchor_generator. num_layers) # create nms plugin nms_config = config.model.ssd.post_processing.batch_non_max_suppression nms_plugin = gs.create_plugin_node( name=TRT_OUTPUT_NAME, op="NMS_TRT", shareLocation=1, varianceEncodedInTarget=0, backgroundLabelId=0, confidenceThreshold=nms_config.score_threshold, nmsThreshold=nms_config.iou_threshold, topK=nms_config.max_detections_per_class, keepTopK=nms_config.max_total_detections, numClasses=config.model.ssd.num_classes + 1, # add background inputOrder=[1, 2, 0], confSigmoid=1, isNormalized=1, scoreConverter="SIGMOID", codeType=3) priorbox_concat_plugin = gs.create_node("priorbox_concat", op="ConcatV2", dtype=tf.float32, axis=2) boxloc_concat_plugin = gs.create_plugin_node( "boxloc_concat", op="FlattenConcat_TRT_jetbot", dtype=tf.float32, ) boxconf_concat_plugin = gs.create_plugin_node( "boxconf_concat", op="FlattenConcat_TRT_jetbot", dtype=tf.float32, ) namespace_plugin_map = { "MultipleGridAnchorGenerator": priorbox_plugin, "Postprocessor": nms_plugin, "Preprocessor": input_plugin, "ToFloat": input_plugin, "image_tensor": input_plugin, "Concatenate": priorbox_concat_plugin, "concat": boxloc_concat_plugin, "concat_1": boxconf_concat_plugin } dynamic_graph.collapse_namespaces(namespace_plugin_map) # fix name for i, name in enumerate( dynamic_graph.find_nodes_by_op('NMS_TRT')[0].input): if TRT_INPUT_NAME in name: dynamic_graph.find_nodes_by_op('NMS_TRT')[0].input.pop(i) dynamic_graph.remove(dynamic_graph.graph_outputs, remove_exclusive_dependencies=False) uff_buffer = uff.from_tensorflow(dynamic_graph.as_graph_def(), [TRT_OUTPUT_NAME]) return uff_buffer
import uff import tensorrt as trt import graphsurgeon as gs import config as model #ctypes.CDLL("lib/libflattenconcat.so.6") COCO_LABELS = coco.COCO_CLASSES_LIST # initialize TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') runtime = trt.Runtime(TRT_LOGGER) # compile model into TensorRT if not os.path.isfile(model.TRTbin): dynamic_graph = model.add_plugin(gs.DynamicGraph(model.path)) uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), model.output_name, output_filename='tmp.uff') with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', model.dims) parser.register_output('MarkOutput_0') parser.parse('tmp.uff', network) engine = builder.build_cuda_engine(network)
#!/usr/bin/env python3 import graphsurgeon as gs import tensorflow as tf import uff if __name__ == "__main__": # USER DEFINED VALUES output_nodes = ["embeddings"] input_node = "input" pb_file = "./facenet.pb" uff_file = "./facenetModels/facenet.uff" # END USER DEFINED VALUES # read tensorflow graph # NOTE: Make sure to freeze and optimize (remove training nodes, etc.) dynamic_graph = gs.DynamicGraph(pb_file) nodes = [n.name for n in dynamic_graph.as_graph_def().node] ns = {} for node in nodes: # replace LeakyRelu with default TRT plugin LReLU_TRT if "LeakyRelu" in node: ns[node] = gs.create_plugin_node(name=node, op="LReLU_TRT", negSlope=0.1) # replace Maximum with L2Norm_Helper_TRT max operation (CUDA's fmaxf) # if node == "orientation/l2_normalize/Maximum": if node == "embeddings/Maximum": ns[node] = gs.create_plugin_node(name=node, op="L2Norm_Helper_TRT", op_type=0, eps=1e-12)
def test_cam(args): """Function to predict for a camera image stream """ ctypes.CDLL("../TRT_object_detection/lib/libflattenconcat.so") COCO_LABELS = coco.COCO_CLASSES_LIST # initialize TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') runtime = trt.Runtime(TRT_LOGGER) # compile model into TensorRT if not os.path.isfile(model.TRTbin): dynamic_graph = model.add_plugin(gs.DynamicGraph(model.path)) uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), model.output_name, output_filename='tmp.uff') with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', model.dims) parser.register_output('MarkOutput_0') parser.parse('tmp.uff', network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(model.TRTbin, 'wb') as f: f.write(buf) # create engine with open(model.TRTbin, 'rb') as f: buf = f.read() engine = runtime.deserialize_cuda_engine(buf) # create buffer host_inputs = [] cuda_inputs = [] host_outputs = [] cuda_outputs = [] bindings = [] stream = cuda.Stream() for binding in engine: size = trt.volume( engine.get_binding_shape(binding)) * engine.max_batch_size host_mem = cuda.pagelocked_empty(size, np.float32) cuda_mem = cuda.mem_alloc(host_mem.nbytes) bindings.append(int(cuda_mem)) if engine.binding_is_input(binding): host_inputs.append(host_mem) cuda_inputs.append(cuda_mem) else: host_outputs.append(host_mem) cuda_outputs.append(cuda_mem) context = engine.create_execution_context() image_queue = LifoQueue() depth_result_queue = LifoQueue() #object_result_queue = LifoQueue() cuda_lock = Lock() # Initialize and start threads for object detection and depth inference #object_detection_thread = ObstacleDetectionThread(image_queue, object_result_queue) depth_inference_thread = DepthInferenceThread(image_queue, depth_result_queue, cuda_lock, args) # Initialize camera to capture image stream # Change the value to 0 when using default camera video_stream = WebcamVideoStream(src=args.webcam).start() if not args.no_display: print("Trying to initinalize DisplayImage()") # Object to display images image_display = DisplayImage(not args.no_process) print("Finished initializing DisplayImage()") # Flag that records when 'q' is pressed to break out of inference loop below quit_inference = False def on_release(key): if key == keyboard.KeyCode.from_char('q'): nonlocal quit_inference quit_inference = True return False keyboard.Listener(on_release=on_release).start() print("Finished starting keyboard listener") #object_detection_thread.start() depth_inference_thread.start() print("Started depth_inference_thread") #finished = True disp_resized = None danger_level = None original_width = 640 original_height = 480 # Number of frames to capture to calculate fps num_frames = 5 curr_time = np.zeros(num_frames) with torch.no_grad(): print("Starting inference loop") while True: if quit_inference: if args.no_display: print('-> Done') break # Capture and send frame to obstacle detection and depth inference thread to be process frame = video_stream.read() copy_frame = frame # Capture and send frame to obstacle detection and depth inference thread to be process #if finished: print("Sent image to depth thread") image_queue.put(copy_frame) # finished = False #else: # print("Still doing last frame") image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = cv2.resize(image, (model.dims[2], model.dims[1])) image = (2.0 / 255.0) * image - 1.0 image = image.transpose((2, 0, 1)) np.copyto(host_inputs[0], image.ravel()) start_time = time.time() print("Right before copying inputs, acquiring lock") try: cuda_lock.acquire() print("Object acquired lock") cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream) print("Right before execute") context.execute_async(bindings=bindings, stream_handle=stream.handle) print("Finished execute") cuda.memcpy_dtoh_async(host_outputs[1], cuda_outputs[1], stream) print("Finished copying outputs") cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream) print("Finished copying outputs 2") stream.synchronize() print("Synchronized stream") cuda_lock.release() print("Object released lock") print("execute times " + str(time.time() - start_time)) except: print("Object couldn't acquire lock, skipping") continue output = host_outputs[0] height, width, channels = frame.shape for i in range(int(len(output) / model.layout)): prefix = i * model.layout index = int(output[prefix + 0]) label = int(output[prefix + 1]) conf = output[prefix + 2] xmin = int(output[prefix + 3] * width) ymin = int(output[prefix + 4] * height) xmax = int(output[prefix + 5] * width) ymax = int(output[prefix + 6] * height) if conf > 0.7: print("Detected {} with confidence {}".format( COCO_LABELS[label], "{0:.0%}".format(conf))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 255), 3) cv2.putText(frame, COCO_LABELS[label], (xmin + 10, ymin + 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA) # Calculate the fps curr_time[1:] = curr_time[:-1] curr_time[0] = time.time() fps = num_frames / (curr_time[0] - curr_time[len(curr_time) - 1]) print("Requesting depth thread to send data back") # Receive results from threads #frame = None print("Requesting obstacle thread to send data back") #detections, frame = object_result_queue.get() try: disp_resized, danger_level = depth_result_queue.get() #finished = True except: print("Didn't get frame from depth thread -- still working") #print(f"Detections: {detections}") print(danger_level) original_width = 640 original_height = 480 if not args.no_display and disp_resized is not None: print("About to use image_display") # DISPLAY # Generate color-mapped depth image image_display.display(frame, disp_resized, fps, original_width, original_height, blended=not args.no_blend) #if frame is not None: #cv2.imshow("Object detection", frame) #else: # continue #cv2.waitKey(1) else: print(f"FPS: {fps}") # When everything is done, stop camera stream video_stream.stop() depth_inference_thread.join()
def test_cam(args): """Function to predict for an image stream """ # Determine where to run inference if torch.cuda.is_available() and not args.no_cuda: device = torch.device("cuda") else: device = torch.device("cpu") # Download model given in args if it doesn't exist download_model_if_doesnt_exist(args.model_name) model_path = os.path.join("models", args.model_name) print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") # LOADING PRETRAINED MODEL print(" Loading pretrained encoder") encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) # Extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() print(" Loading pretrained decoder") depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() print("-> Loading complete, initializing the camera") # Get coco labels ctypes.CDLL("../TRT_object_detection/lib/libflattenconcat.so") COCO_LABELS = coco.COCO_CLASSES_LIST # initialize TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') runtime = trt.Runtime(TRT_LOGGER) # compile model into TensorRT if not os.path.isfile(model.TRTbin): dynamic_graph = model.add_plugin(gs.DynamicGraph(model.path)) uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), model.output_name, output_filename='tmp.uff') with trt.Builder(TRT_LOGGER) as builder, builder.create_network( ) as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', model.dims) parser.register_output('MarkOutput_0') parser.parse('tmp.uff', network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(model.TRTbin, 'wb') as f: f.write(buf) # create engine with open(model.TRTbin, 'rb') as f: buf = f.read() engine = runtime.deserialize_cuda_engine(buf) # create buffer host_inputs = [] cuda_inputs = [] host_outputs = [] cuda_outputs = [] bindings = [] stream = cuda.Stream() for binding in engine: size = trt.volume( engine.get_binding_shape(binding)) * engine.max_batch_size host_mem = cuda.pagelocked_empty(size, np.float32) cuda_mem = cuda.mem_alloc(host_mem.nbytes) bindings.append(int(cuda_mem)) if engine.binding_is_input(binding): host_inputs.append(host_mem) cuda_inputs.append(cuda_mem) else: host_outputs.append(host_mem) cuda_outputs.append(cuda_mem) context = engine.create_execution_context() if not args.no_display: # Object to display images image_display = DisplayImage(not args.no_process) # Flag that records when 'q' is pressed to break out of inference loop below quit_inference = False # Listener for key board presses and updates quit_inference def on_release(key): if key == keyboard.KeyCode.from_char('q'): nonlocal quit_inference quit_inference = True return False # Initialize listener keyboard.Listener(on_release=on_release).start() status_socket_thread = SocketStatusThread() status_socket_thread.start() image_stream_thread = ImageStreamThread() image_stream_thread.start() # Number of frames to capture to calculate fps num_frames = 5 curr_time = np.zeros(num_frames) with torch.no_grad(): while True: if quit_inference: if args.no_display: image_stream_thread.stop() print('-> Done') break frame = image_stream_thread.read_frame() # Calculate the fps curr_time[1:] = curr_time[:-1] curr_time[0] = time.time() fps = num_frames / (curr_time[0] - curr_time[len(curr_time) - 1]) # Do depth inference disp_resized, danger_level, danger_side, original_width, original_height = predict_depth( frame, feed_width, feed_height, device, encoder, depth_decoder) # Only do object detection if danger level is above 0 (i.e. Careful or Dangerous) print(f"Danger level: {danger_level}") detections_str = "" if danger_level > 0: detections = detect_objects(frame, host_inputs, host_outputs, cuda_inputs, cuda_outputs, bindings, stream, context, COCO_LABELS) # Only sending back detections in region where depth seems close # detections = detections_dict[danger_side] detections_str = '\n' + '\n'.join('$'.join(map(str, obj)) for obj in detections) print(str(detections)) print(f"Detections: {detections_str}") # Construct string with danger level and END signal # Separate each piece (i.e. danger level, each detection, END) with new line so client socket knows # where each item ends result = str( danger_level) + "\n" + danger_side + detections_str + "\nEND\n" print("Sending result...") image_stream_thread.send_result(result) if not args.no_display: # Generate color-mapped depth image and display alongside original frame and blended, if chosen disp_resized_np = disp_resized.squeeze().cpu().detach().numpy() image_display.display(frame, disp_resized_np, fps, original_width, original_height, blended=not args.no_blend) cv2.waitKey(1) else: print(f"FPS: {fps}") print("Outside of with statement") image_stream_thread.stop()
def from_tensorflow(graphdef, output_nodes=[], preprocessor=None, **kwargs): """ Converts a TensorFlow GraphDef to a UFF model. Args: graphdef (tensorflow.GraphDef): The TensorFlow graph to convert. output_nodes (list(str)): The names of the outputs of the graph. If not provided, graphsurgeon is used to automatically deduce output nodes. output_filename (str): The UFF file to write. preprocessor (str): The path to a preprocessing script that will be executed before the converter. This script should define a ``preprocess`` function which accepts a graphsurgeon DynamicGraph and modifies it in place. write_preprocessed (bool): If set to True, the converter will write out the preprocessed graph as well as a TensorBoard visualization. Must be used in conjunction with output_filename. text (bool): If set to True, the converter will also write out a human readable UFF file. Must be used in conjunction with output_filename. quiet (bool): If set to True, suppresses informational messages. Errors may still be printed. list_nodes (bool): If set to True, the converter displays a list of all nodes present in the graph. debug_mode (bool): If set to True, the converter prints verbose debug messages. return_graph_info (bool): If set to True, this function returns the graph input and output nodes in addition to the serialized UFF graph. Returns: serialized UFF MetaGraph (str) OR, if return_graph_info is set to True, serialized UFF MetaGraph (str), graph inputs (list(tensorflow.NodeDef)), graph outputs (list(tensorflow.NodeDef)) """ quiet = False input_node = [] text = False list_nodes = False output_filename = None write_preprocessed = False debug_mode = False return_graph_info = False for k, v in kwargs.items(): if k == "quiet": quiet = v elif k == "input_node": input_node = v elif k == "text": text = v elif k == "list_nodes": list_nodes = v elif k == "output_filename": output_filename = v elif k == "write_preprocessed": write_preprocessed = v elif k == "debug_mode": debug_mode = v elif k == "return_graph_info": return_graph_info = v tf_supported_ver = "1.12.0" if not quiet: print("NOTE: UFF has been tested with TensorFlow " + str(tf_supported_ver) + ". Other versions are not guaranteed to work") if tf.__version__ != tf_supported_ver: print( "WARNING: The version of TensorFlow installed on this system is not guaranteed to work with UFF." ) try: import graphsurgeon as gs except ImportError as err: raise ImportError("""ERROR: Failed to import module ({}) Please make sure you have graphsurgeon installed. For installation instructions, see: https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/#python and click on the 'TensoRT Python API' link""" .format(err)) # Create a dynamic graph so we can adjust it as needed. dynamic_graph = gs.DynamicGraph(graphdef) # Always remove assert ops. assert_nodes = dynamic_graph.find_nodes_by_op("Assert") dynamic_graph.remove(assert_nodes, remove_exclusive_dependencies=True) # Now, run the preprocessor, if provided. if preprocessor: import importlib, sys # Temporarily insert this working dir into the sys.path sys.path.insert(0, os.path.dirname(preprocessor)) # Import and execute! pre = importlib.import_module( os.path.splitext(os.path.basename(preprocessor))[0]) pre.preprocess(dynamic_graph) # Now clean up, by removing the directory from the system path. del sys.path[0] # Run process_dilated_conv() and process_softmax() so the user doesn't have to. gs.extras.process_dilated_conv(dynamic_graph) gs.extras.process_softmax(dynamic_graph) # Get the modified graphdef back. graphdef = dynamic_graph.as_graph_def() if write_preprocessed and output_filename: preprocessed_output_name = os.path.splitext( output_filename)[0] + "_preprocessed" dynamic_graph.write(preprocessed_output_name + ".pb") dynamic_graph.write_tensorboard(preprocessed_output_name) if not quiet: print("Preprocessed graph written to " + preprocessed_output_name + ".pb") print("TensorBoard visualization written to " + preprocessed_output_name) if not quiet: print("UFF Version " + uff.__version__) if debug_mode: _debug_print("Debug Mode is ENABLED") if not input_node: if not quiet: print("=== Automatically deduced input nodes ===") print(str(dynamic_graph.graph_inputs)) print("=========================================\n") # Deduce the likely graph outputs if none are provided if not output_nodes: output_nodes = [node.name for node in dynamic_graph.graph_outputs] if not quiet: print("=== Automatically deduced output nodes ===") print(str(dynamic_graph.graph_outputs)) print("==========================================\n") if list_nodes: for i, node in enumerate(graphdef.node): print('%i %s: "%s"' % (i + 1, node.op, node.name)) return for i, name in enumerate(output_nodes): if debug_mode: _debug_print("Enumerating outputs") output_nodes[i] = tf2uff.convert_node_name_or_index_to_name( name, graphdef.node, debug_mode=debug_mode) if not quiet: print("Using output node", output_nodes[i]) input_replacements = {} for i, name_data in enumerate(input_node): name, new_name, dtype, shape = name_data.split(',', 3) name = tf2uff.convert_node_name_or_index_to_name(name, graphdef.node, debug_mode=debug_mode) if new_name == '': new_name = name dtype = np.dtype(dtype) shape = [int(x) for x in shape.split(',')] input_replacements[name] = (new_name, dtype, shape) if not quiet: print("Using input node", name) if not quiet: print("Converting to UFF graph") uff_metagraph = uff.model.MetaGraph() tf2uff.add_custom_descriptors(uff_metagraph) uff_graph = tf2uff.convert_tf2uff_graph( graphdef, uff_metagraph, output_nodes=output_nodes, input_replacements=input_replacements, name="main", debug_mode=debug_mode) uff_metagraph_proto = uff_metagraph.to_uff() if not quiet: print('No. nodes:', len(uff_graph.nodes)) if output_filename: with open(output_filename, 'wb') as f: f.write(uff_metagraph_proto.SerializeToString()) if not quiet: print("UFF Output written to", output_filename) if text: # ASK: Would you want to return the prototxt? if not output_filename: raise ValueError( "Requested prototxt but did not provide file path") output_filename_txt = _replace_ext(output_filename, '.pbtxt') with open(output_filename_txt, 'w') as f: f.write(str(uff_metagraph.to_uff(debug=True))) if not quiet: print("UFF Text Output written to", output_filename_txt) # Always return the UFF graph! if return_graph_info: return uff_metagraph_proto.SerializeToString( ), dynamic_graph.graph_inputs, dynamic_graph.graph_outputs else: return uff_metagraph_proto.SerializeToString()
"concat_1": concat_box_conf } # Create a new graph by collapsing namespaces ssd_graph.collapse_namespaces(namespace_plugin_map) # Remove the outputs, so we just have a single output node (NMS). # If remove_exclusive_dependencies is True, the whole graph will be removed! ssd_graph.remove(ssd_graph.graph_outputs, remove_exclusive_dependencies=False) ssd_graph.find_nodes_by_op("NMS_TRT")[0].input.remove("Input") return ssd_graph """Takes frozen .pb graph, converts it to .uff and saves it to file. Args: model_path (str): .pb model path output_uff_path (str): .uff path where the UFF file will be saved silent (bool): if False, writes progress messages to stdout """ model_path = 'frozen_inference_graph.pb' output_uff_path = 'frozen_inference_graph.uff' dynamic_graph = gs.DynamicGraph(model_path) dynamic_graph = ssd_unsupported_nodes_to_plugin_nodes(dynamic_graph) uff.from_tensorflow(dynamic_graph.as_graph_def(), [ModelData.OUTPUT_NAME], output_filename=output_uff_path, text=True)