def check_mps_status(): # Xavier does not have MPS if is_xavier(): return False # Check by printing out currently running processes and grepping nvidia-cuda-mps-control. cmd = "ps -ef | grep nvidia-cuda-mps-control | grep -c -v grep" logging.debug("Checking if MPS is running with command: {:}".format(cmd)) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p.wait() output = p.stdout.readlines() return int(output[0]) >= 1
def postprocess_2(self): # Set input dtype and format input_tensor = self.network.get_input(0) if self.input_dtype == "int8": input_tensor.dtype = trt.int8 input_tensor.dynamic_range = (-128, 127) if self.input_format == "linear": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) elif self.input_format == "chw4": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4) # Get the layers we care about. nb_layers = self.network.num_layers logging.debug(nb_layers) for i in range(nb_layers): layer = self.network.get_layer(i) logging.info("({:}) Layer '{:}' -> Type: {:} ON {:}".format( i, layer.name, layer.type, self.builder_config.get_device_type(layer))) while self.network.num_outputs > 0: logging.info("Unmarking output: {:}".format( self.network.get_output(0).name)) self.network.unmark_output(self.network.get_output(0)) #add top-k last_fc_layer = self.network.get_layer(nb_layers - 1) topk_layer = self.network.add_topk(last_fc_layer.get_output(0), trt.TopKOperation.MAX, 1, 2) topk_layer.name = "topk_layer" topk_layer.get_output(0).name = "topk_layer_output_value" topk_layer.get_output(1).name = "topk_layer_output_index" self.network.mark_output(topk_layer.get_output(1)) if self.network.num_outputs != 1: logging.warning( "num outputs should be 1 after unmarking! Has {:}".format( self.network.num_outputs)) raise Exception
def postprocess(self, replace_relu6=False): nb_layers = self.network.num_layers # Layer preprocessing for i in range(nb_layers): layer = self.network.get_layer(i) logging.debug("({:}) Layer '{:}' -> Type: {:} ON {:}".format( i, layer.name, layer.type, self.builder_config.get_device_type(layer))) if replace_relu6 and "Relu6" in layer.name: activation = layer activation.__class__ = trt.IActivationLayer logging.debug("\tType: {:}, alpha={:}, beta={:}".format( activation.type, activation.alpha, activation.beta)) # Convert to RELU if activation.type == trt.ActivationType.CLIP: logging.debug("\tConverting to ReLU activation") activation.type = trt.ActivationType.RELU # Connect NMS to prior box constant node prior_box = multipleGridAnchorGenerator( numLayers=6, minSize=0.2, maxSize=0.95, aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33], variance=[0.1, 0.1, 0.2, 0.2], featureMapShapes=[19, 10, 5, 3, 2, 1]) prior_box_layer = self.network.add_constant( (2, 7668, 1), prior_box.astype(np.float32)) nms_layer = next( self.network.get_layer(i) for i in range(self.network.num_layers) if "Postprocessor_" in self.network.get_layer(i).name) prior_box_input_index = next( i for i in range(nms_layer.num_inputs) if "concat_priorbox" == nms_layer.get_input(i).name) nms_layer.set_input(prior_box_input_index, prior_box_layer.get_output(0)) # Assign output node previous_output = next( self.network.get_output(i) for i in range(self.network.num_outputs) if "Postprocessor" == self.network.get_output(i).name) self.network.unmark_output(previous_output) self.network.mark_output(nms_layer.get_output(0)) # Connect NMS input to manually merged convolution layer for i in range(0, 6): tensor = mergeLocConfConv(self.network, i) nms_layer.set_input(i, tensor) nms_layer.set_input(i + 7, tensor)
def fix_layer_names(self): layer_name_map = { "resnet_model/conv2d/Conv2D": "conv1", "resnet_model/batch_normalization/FusedBatchNorm": "scale_conv1", "resnet_model/Relu": "conv1_relu", "resnet_model/max_pooling2d/MaxPool": "pool1", "Conv__128": "res2a_branch2a", "resnet_model/Relu_1": "res2a_branch2a_relu", "Conv__129": "res2a_branch2b", "resnet_model/Relu_2": "res2a_branch2b_relu", "Conv__130": "res2a_branch2c", "Conv__123": "res2a_branch1", "resnet_model/add": "res2a", "resnet_model/Relu_3": "res2a_relu", "Conv__131": "res2b_branch2a", "resnet_model/Relu_4": "res2b_branch2a_relu", "Conv__132": "res2b_branch2b", "resnet_model/Relu_5": "res2b_branch2b_relu", "Conv__133": "res2b_branch2c", "resnet_model/add_1": "res2b", "resnet_model/Relu_6": "res2b_relu", "Conv__138": "res2c_branch2a", "resnet_model/Relu_7": "res2c_branch2a_relu", "Conv__139": "res2c_branch2b", "resnet_model/Relu_8": "res2c_branch2b_relu", "Conv__140": "res2c_branch2c", "resnet_model/add_2": "res2c", "resnet_model/Relu_9": "res2c_relu", "Conv__145": "res3a_branch2a", "resnet_model/Relu_10": "res3a_branch2a_relu", "Conv__146": "res3a_branch2b", "resnet_model/Relu_11": "res3a_branch2b_relu", "Conv__147": "res3a_branch2c", "Conv__152": "res3a_branch1", "resnet_model/add_3": "res3a", "resnet_model/Relu_12": "res3a_relu", "Conv__153": "res3b_branch2a", "resnet_model/Relu_13": "res3b_branch2a_relu", "Conv__154": "res3b_branch2b", "resnet_model/Relu_14": "res3b_branch2b_relu", "Conv__155": "res3b_branch2c", "resnet_model/add_4": "res3b", "resnet_model/Relu_15": "res3b_relu", "Conv__160": "res3c_branch2a", "resnet_model/Relu_16": "res3c_branch2a_relu", "Conv__161": "res3c_branch2b", "resnet_model/Relu_17": "res3c_branch2b_relu", "Conv__162": "res3c_branch2c", "resnet_model/add_5": "res3c", "resnet_model/Relu_18": "res3c_relu", "Conv__167": "res3d_branch2a", "resnet_model/Relu_19": "res3d_branch2a_relu", "Conv__168": "res3d_branch2b", "resnet_model/Relu_20": "res3d_branch2b_relu", "Conv__169": "res3d_branch2c", "resnet_model/add_6": "res3d", "resnet_model/Relu_21": "res3d_relu", "Conv__174": "res4a_branch2a", "resnet_model/Relu_22": "res4a_branch2a_relu", "Conv__175": "res4a_branch2b", "resnet_model/Relu_23": "res4a_branch2b_relu", "Conv__176": "res4a_branch2c", "Conv__181": "res4a_branch1", "resnet_model/add_7": "res4a", "resnet_model/Relu_24": "res4a_relu", "Conv__182": "res4b_branch2a", "resnet_model/Relu_25": "res4b_branch2a_relu", "Conv__183": "res4b_branch2b", "resnet_model/Relu_26": "res4b_branch2b_relu", "Conv__184": "res4b_branch2c", "resnet_model/add_8": "res4b", "resnet_model/Relu_27": "res4b_relu", "Conv__189": "res4c_branch2a", "resnet_model/Relu_28": "res4c_branch2a_relu", "Conv__190": "res4c_branch2b", "resnet_model/Relu_29": "res4c_branch2b_relu", "Conv__191": "res4c_branch2c", "resnet_model/add_9": "res4c", "resnet_model/Relu_30": "res4c_relu", "Conv__196": "res4d_branch2a", "resnet_model/Relu_31": "res4d_branch2a_relu", "Conv__197": "res4d_branch2b", "resnet_model/Relu_32": "res4d_branch2b_relu", "Conv__198": "res4d_branch2c", "resnet_model/add_10": "res4d", "resnet_model/Relu_33": "res4d_relu", "Conv__203": "res4e_branch2a", "resnet_model/Relu_34": "res4e_branch2a_relu", "Conv__204": "res4e_branch2b", "resnet_model/Relu_35": "res4e_branch2b_relu", "Conv__205": "res4e_branch2c", "resnet_model/add_11": "res4e", "resnet_model/Relu_36": "res4e_relu", "Conv__210": "res4f_branch2a", "resnet_model/Relu_37": "res4f_branch2a_relu", "Conv__211": "res4f_branch2b", "resnet_model/Relu_38": "res4f_branch2b_relu", "Conv__212": "res4f_branch2c", "resnet_model/add_12": "res4f", "resnet_model/Relu_39": "res4f_relu", "Conv__217": "res5a_branch1", "Conv__222": "res5a_branch2a", "resnet_model/Relu_40": "res5a_branch2a_relu", "Conv__223": "res5a_branch2b", "resnet_model/Relu_41": "res5a_branch2b_relu", "Conv__224": "res5a_branch2c", "resnet_model/add_13": "res5a", "resnet_model/Relu_42": "res5a_relu", "Conv__225": "res5b_branch2a", "resnet_model/Relu_43": "res5b_branch2a_relu", "Conv__226": "res5b_branch2b", "resnet_model/Relu_44": "res5b_branch2b_relu", "Conv__227": "res5b_branch2c", "resnet_model/add_14": "res5b", "resnet_model/Relu_45": "res5b_relu", "Conv__232": "res5c_branch2a", "resnet_model/Relu_46": "res5c_branch2a_relu", "Conv__233": "res5c_branch2b", "resnet_model/Relu_47": "res5c_branch2b_relu", "Conv__234": "res5c_branch2c", "resnet_model/add_15": "res5c", "resnet_model/Relu_48": "res5c_relu", "resnet_model/Mean": "pool5", # "reshape__269": "", # "resnet_model/Squeeze": "", # "(Unnamed Layer* 123) [Shape]": "", # "(Unnamed Layer* 124) [Gather]": "", # "(Unnamed Layer* 125) [Shuffle]": "", # "resnet_model/dense/MatMul": "", # "(Unnamed Layer* 127) [Shape]": "", # "(Unnamed Layer* 128) [Constant]": "", # "(Unnamed Layer* 129) [Concatenation]": "", # "(Unnamed Layer* 130) [Constant]": "", # "(Unnamed Layer* 131) [Gather]": "", # "(Unnamed Layer* 132) [Shuffle]": "", # TODO: ONNX Parser change # "(Unnamed Layer* 133) [Fully Connected]": "fc1000", "resnet_model/dense/MatMul": "fc1000", # "(Unnamed Layer* 134) [Constant]": "", # "(Unnamed Layer* 135) [Shape]": "", # "(Unnamed Layer* 136) [Gather]": "", # "(Unnamed Layer* 137) [Shuffle]": "", # "resnet_model/dense/BiasAdd": "", # "(Unnamed Layer* 139) [Shuffle]": "", # "(Unnamed Layer* 140) [ElementWise]": "", # "resnet_model/final_dense": "", # "softmax_tensor": "", # "(Unnamed Layer* 143) [Shape]": "", # "(Unnamed Layer* 144) [Gather]": "", # "(Unnamed Layer* 145) [Constant]": "", # "(Unnamed Layer* 146) [Concatenation]": "", # "(Unnamed Layer* 147) [Shuffle]": "", # TODO: ONNX Parser change # "(Unnamed Layer* 148) [Softmax]": "prob", "softmax_tensor": "prob", # "(Unnamed Layer* 149) [Shuffle]": "", # "(Unnamed Layer* 150) [Shape]": "", # "graph_outputs_Identity__6": "", "ArgMax": "topk", # "(Unnamed Layer* 153) [Constant]": "", # "(Unnamed Layer* 154) [Shape]": "", # "(Unnamed Layer* 155) [Gather]": "", # "(Unnamed Layer* 156) [Shuffle]": "", # "graph_outputs_Identity__4": "", } # rename layers to something more sensible nb_layers = self.network.num_layers for i in range(nb_layers): layer = self.network.get_layer(i) if layer.name in layer_name_map: new_layer_name = layer_name_map[layer.name] logging.debug("Renaming Layer: {:} -> {:}".format( layer.name, new_layer_name)) layer.name = new_layer_name
def postprocess(self, useConvForFC=False): # Set input dtype and format input_tensor = self.network.get_input(0) if self.input_dtype == "int8": input_tensor.dtype = trt.int8 input_tensor.dynamic_range = (-128, 127) if self.input_format == "linear": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR) elif self.input_format == "chw4": input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4) # Get the layers we care about. nb_layers = self.network.num_layers logging.debug(nb_layers) for i in range(nb_layers): layer = self.network.get_layer(i) logging.info("({:}) Layer '{:}' -> Type: {:} ON {:}".format( i, layer.name, layer.type, self.builder_config.get_device_type(layer))) # Detect the FC layer. # if "Fully Connected" in layer.name: if "MatMul" in layer.name: fc_layer = layer assert fc_layer.type == trt.LayerType.FULLY_CONNECTED fc_layer.__class__ = trt.IFullyConnectedLayer fc_kernel = fc_layer.kernel.reshape(1001, 2048)[1:, :] fc_bias = fc_layer.bias[1:] # (i-13)th layer should be reduction. reduce_layer = self.network.get_layer(i - 13) assert reduce_layer.type == trt.LayerType.REDUCE reduce_layer.__class__ = trt.IReduceLayer # (i-14)th layer should be the last ReLU last_conv_layer = self.network.get_layer(i - 14) assert last_conv_layer.type == trt.LayerType.ACTIVATION last_conv_layer.__class__ = trt.IActivationLayer # Unmark the old output since we are going to add new layers for the final part of the network. while self.network.num_outputs > 0: logging.info("Unmarking output: {:}".format( self.network.get_output(0).name)) self.network.unmark_output(self.network.get_output(0)) # Replace the reduce layer with pooling layer pool_layer_new = self.network.add_pooling( last_conv_layer.get_output(0), trt.PoolingType.AVERAGE, (7, 7)) pool_layer_new.name = "squeeze_replaced" pool_layer_new.get_output(0).name = "squeeze_replaced_output" # Add fc layer fc_kernel = fc_kernel.flatten() if useConvForFC: fc_layer_new = self.network.add_convolution( pool_layer_new.get_output(0), fc_bias.size, (1, 1), fc_kernel, fc_bias) else: fc_layer_new = self.network.add_fully_connected( pool_layer_new.get_output(0), fc_bias.size, fc_kernel, fc_bias) fc_layer_new.name = "fc_replaced" fc_layer_new.get_output(0).name = "fc_replaced_output" # Add topK layer. topk_layer = self.network.add_topk(fc_layer_new.get_output(0), trt.TopKOperation.MAX, 1, 2) topk_layer.name = "topk_layer" topk_layer.get_output(0).name = "topk_layer_output_value" topk_layer.get_output(1).name = "topk_layer_output_index" # Mark the new output. self.network.mark_output(topk_layer.get_output(1)) if self.network.num_outputs != 1: logging.warning( "num outputs should be 1 after unmarking! Has {:}".format( self.network.num_outputs)) raise Exception