def check_mps_status():
    # Xavier does not have MPS
    if is_xavier():
        return False
    # Check by printing out currently running processes and grepping nvidia-cuda-mps-control.
    cmd = "ps -ef | grep nvidia-cuda-mps-control | grep -c -v grep"
    logging.debug("Checking if MPS is running with command: {:}".format(cmd))
    p = subprocess.Popen(cmd,
                         shell=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT)
    p.wait()
    output = p.stdout.readlines()
    return int(output[0]) >= 1
Exemple #2
0
    def postprocess_2(self):
        # Set input dtype and format
        input_tensor = self.network.get_input(0)
        if self.input_dtype == "int8":
            input_tensor.dtype = trt.int8
            input_tensor.dynamic_range = (-128, 127)
        if self.input_format == "linear":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)
        elif self.input_format == "chw4":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4)

        # Get the layers we care about.
        nb_layers = self.network.num_layers
        logging.debug(nb_layers)
        for i in range(nb_layers):
            layer = self.network.get_layer(i)
            logging.info("({:}) Layer '{:}' -> Type: {:} ON {:}".format(
                i, layer.name, layer.type,
                self.builder_config.get_device_type(layer)))

        while self.network.num_outputs > 0:
            logging.info("Unmarking output: {:}".format(
                self.network.get_output(0).name))
            self.network.unmark_output(self.network.get_output(0))
        #add top-k
        last_fc_layer = self.network.get_layer(nb_layers - 1)
        topk_layer = self.network.add_topk(last_fc_layer.get_output(0),
                                           trt.TopKOperation.MAX, 1, 2)
        topk_layer.name = "topk_layer"
        topk_layer.get_output(0).name = "topk_layer_output_value"
        topk_layer.get_output(1).name = "topk_layer_output_index"
        self.network.mark_output(topk_layer.get_output(1))

        if self.network.num_outputs != 1:
            logging.warning(
                "num outputs should be 1 after unmarking! Has {:}".format(
                    self.network.num_outputs))
            raise Exception
Exemple #3
0
    def postprocess(self, replace_relu6=False):
        nb_layers = self.network.num_layers

        # Layer preprocessing
        for i in range(nb_layers):
            layer = self.network.get_layer(i)
            logging.debug("({:}) Layer '{:}' -> Type: {:} ON {:}".format(
                i, layer.name, layer.type,
                self.builder_config.get_device_type(layer)))

            if replace_relu6 and "Relu6" in layer.name:
                activation = layer
                activation.__class__ = trt.IActivationLayer
                logging.debug("\tType: {:}, alpha={:}, beta={:}".format(
                    activation.type, activation.alpha, activation.beta))
                # Convert to RELU
                if activation.type == trt.ActivationType.CLIP:
                    logging.debug("\tConverting to ReLU activation")
                    activation.type = trt.ActivationType.RELU

        # Connect NMS to prior box constant node
        prior_box = multipleGridAnchorGenerator(
            numLayers=6,
            minSize=0.2,
            maxSize=0.95,
            aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
            variance=[0.1, 0.1, 0.2, 0.2],
            featureMapShapes=[19, 10, 5, 3, 2, 1])
        prior_box_layer = self.network.add_constant(
            (2, 7668, 1), prior_box.astype(np.float32))
        nms_layer = next(
            self.network.get_layer(i) for i in range(self.network.num_layers)
            if "Postprocessor_" in self.network.get_layer(i).name)
        prior_box_input_index = next(
            i for i in range(nms_layer.num_inputs)
            if "concat_priorbox" == nms_layer.get_input(i).name)
        nms_layer.set_input(prior_box_input_index,
                            prior_box_layer.get_output(0))

        # Assign output node
        previous_output = next(
            self.network.get_output(i) for i in range(self.network.num_outputs)
            if "Postprocessor" == self.network.get_output(i).name)
        self.network.unmark_output(previous_output)
        self.network.mark_output(nms_layer.get_output(0))

        # Connect NMS input to manually merged convolution layer
        for i in range(0, 6):
            tensor = mergeLocConfConv(self.network, i)
            nms_layer.set_input(i, tensor)
            nms_layer.set_input(i + 7, tensor)
Exemple #4
0
    def fix_layer_names(self):
        layer_name_map = {
            "resnet_model/conv2d/Conv2D": "conv1",
            "resnet_model/batch_normalization/FusedBatchNorm": "scale_conv1",
            "resnet_model/Relu": "conv1_relu",
            "resnet_model/max_pooling2d/MaxPool": "pool1",
            "Conv__128": "res2a_branch2a",
            "resnet_model/Relu_1": "res2a_branch2a_relu",
            "Conv__129": "res2a_branch2b",
            "resnet_model/Relu_2": "res2a_branch2b_relu",
            "Conv__130": "res2a_branch2c",
            "Conv__123": "res2a_branch1",
            "resnet_model/add": "res2a",
            "resnet_model/Relu_3": "res2a_relu",
            "Conv__131": "res2b_branch2a",
            "resnet_model/Relu_4": "res2b_branch2a_relu",
            "Conv__132": "res2b_branch2b",
            "resnet_model/Relu_5": "res2b_branch2b_relu",
            "Conv__133": "res2b_branch2c",
            "resnet_model/add_1": "res2b",
            "resnet_model/Relu_6": "res2b_relu",
            "Conv__138": "res2c_branch2a",
            "resnet_model/Relu_7": "res2c_branch2a_relu",
            "Conv__139": "res2c_branch2b",
            "resnet_model/Relu_8": "res2c_branch2b_relu",
            "Conv__140": "res2c_branch2c",
            "resnet_model/add_2": "res2c",
            "resnet_model/Relu_9": "res2c_relu",
            "Conv__145": "res3a_branch2a",
            "resnet_model/Relu_10": "res3a_branch2a_relu",
            "Conv__146": "res3a_branch2b",
            "resnet_model/Relu_11": "res3a_branch2b_relu",
            "Conv__147": "res3a_branch2c",
            "Conv__152": "res3a_branch1",
            "resnet_model/add_3": "res3a",
            "resnet_model/Relu_12": "res3a_relu",
            "Conv__153": "res3b_branch2a",
            "resnet_model/Relu_13": "res3b_branch2a_relu",
            "Conv__154": "res3b_branch2b",
            "resnet_model/Relu_14": "res3b_branch2b_relu",
            "Conv__155": "res3b_branch2c",
            "resnet_model/add_4": "res3b",
            "resnet_model/Relu_15": "res3b_relu",
            "Conv__160": "res3c_branch2a",
            "resnet_model/Relu_16": "res3c_branch2a_relu",
            "Conv__161": "res3c_branch2b",
            "resnet_model/Relu_17": "res3c_branch2b_relu",
            "Conv__162": "res3c_branch2c",
            "resnet_model/add_5": "res3c",
            "resnet_model/Relu_18": "res3c_relu",
            "Conv__167": "res3d_branch2a",
            "resnet_model/Relu_19": "res3d_branch2a_relu",
            "Conv__168": "res3d_branch2b",
            "resnet_model/Relu_20": "res3d_branch2b_relu",
            "Conv__169": "res3d_branch2c",
            "resnet_model/add_6": "res3d",
            "resnet_model/Relu_21": "res3d_relu",
            "Conv__174": "res4a_branch2a",
            "resnet_model/Relu_22": "res4a_branch2a_relu",
            "Conv__175": "res4a_branch2b",
            "resnet_model/Relu_23": "res4a_branch2b_relu",
            "Conv__176": "res4a_branch2c",
            "Conv__181": "res4a_branch1",
            "resnet_model/add_7": "res4a",
            "resnet_model/Relu_24": "res4a_relu",
            "Conv__182": "res4b_branch2a",
            "resnet_model/Relu_25": "res4b_branch2a_relu",
            "Conv__183": "res4b_branch2b",
            "resnet_model/Relu_26": "res4b_branch2b_relu",
            "Conv__184": "res4b_branch2c",
            "resnet_model/add_8": "res4b",
            "resnet_model/Relu_27": "res4b_relu",
            "Conv__189": "res4c_branch2a",
            "resnet_model/Relu_28": "res4c_branch2a_relu",
            "Conv__190": "res4c_branch2b",
            "resnet_model/Relu_29": "res4c_branch2b_relu",
            "Conv__191": "res4c_branch2c",
            "resnet_model/add_9": "res4c",
            "resnet_model/Relu_30": "res4c_relu",
            "Conv__196": "res4d_branch2a",
            "resnet_model/Relu_31": "res4d_branch2a_relu",
            "Conv__197": "res4d_branch2b",
            "resnet_model/Relu_32": "res4d_branch2b_relu",
            "Conv__198": "res4d_branch2c",
            "resnet_model/add_10": "res4d",
            "resnet_model/Relu_33": "res4d_relu",
            "Conv__203": "res4e_branch2a",
            "resnet_model/Relu_34": "res4e_branch2a_relu",
            "Conv__204": "res4e_branch2b",
            "resnet_model/Relu_35": "res4e_branch2b_relu",
            "Conv__205": "res4e_branch2c",
            "resnet_model/add_11": "res4e",
            "resnet_model/Relu_36": "res4e_relu",
            "Conv__210": "res4f_branch2a",
            "resnet_model/Relu_37": "res4f_branch2a_relu",
            "Conv__211": "res4f_branch2b",
            "resnet_model/Relu_38": "res4f_branch2b_relu",
            "Conv__212": "res4f_branch2c",
            "resnet_model/add_12": "res4f",
            "resnet_model/Relu_39": "res4f_relu",
            "Conv__217": "res5a_branch1",
            "Conv__222": "res5a_branch2a",
            "resnet_model/Relu_40": "res5a_branch2a_relu",
            "Conv__223": "res5a_branch2b",
            "resnet_model/Relu_41": "res5a_branch2b_relu",
            "Conv__224": "res5a_branch2c",
            "resnet_model/add_13": "res5a",
            "resnet_model/Relu_42": "res5a_relu",
            "Conv__225": "res5b_branch2a",
            "resnet_model/Relu_43": "res5b_branch2a_relu",
            "Conv__226": "res5b_branch2b",
            "resnet_model/Relu_44": "res5b_branch2b_relu",
            "Conv__227": "res5b_branch2c",
            "resnet_model/add_14": "res5b",
            "resnet_model/Relu_45": "res5b_relu",
            "Conv__232": "res5c_branch2a",
            "resnet_model/Relu_46": "res5c_branch2a_relu",
            "Conv__233": "res5c_branch2b",
            "resnet_model/Relu_47": "res5c_branch2b_relu",
            "Conv__234": "res5c_branch2c",
            "resnet_model/add_15": "res5c",
            "resnet_model/Relu_48": "res5c_relu",
            "resnet_model/Mean": "pool5",
            # "reshape__269": "",
            # "resnet_model/Squeeze": "",
            # "(Unnamed Layer* 123) [Shape]": "",
            # "(Unnamed Layer* 124) [Gather]": "",
            # "(Unnamed Layer* 125) [Shuffle]": "",
            # "resnet_model/dense/MatMul": "",
            # "(Unnamed Layer* 127) [Shape]": "",
            # "(Unnamed Layer* 128) [Constant]": "",
            # "(Unnamed Layer* 129) [Concatenation]": "",
            # "(Unnamed Layer* 130) [Constant]": "",
            # "(Unnamed Layer* 131) [Gather]": "",
            # "(Unnamed Layer* 132) [Shuffle]": "",
            # TODO: ONNX Parser change
            # "(Unnamed Layer* 133) [Fully Connected]": "fc1000",
            "resnet_model/dense/MatMul": "fc1000",
            # "(Unnamed Layer* 134) [Constant]": "",
            # "(Unnamed Layer* 135) [Shape]": "",
            # "(Unnamed Layer* 136) [Gather]": "",
            # "(Unnamed Layer* 137) [Shuffle]": "",
            # "resnet_model/dense/BiasAdd": "",
            # "(Unnamed Layer* 139) [Shuffle]": "",
            # "(Unnamed Layer* 140) [ElementWise]": "",
            # "resnet_model/final_dense": "",
            # "softmax_tensor": "",
            # "(Unnamed Layer* 143) [Shape]": "",
            # "(Unnamed Layer* 144) [Gather]": "",
            # "(Unnamed Layer* 145) [Constant]": "",
            # "(Unnamed Layer* 146) [Concatenation]": "",
            # "(Unnamed Layer* 147) [Shuffle]": "",
            # TODO: ONNX Parser change
            # "(Unnamed Layer* 148) [Softmax]": "prob",
            "softmax_tensor": "prob",
            # "(Unnamed Layer* 149) [Shuffle]": "",
            # "(Unnamed Layer* 150) [Shape]": "",
            # "graph_outputs_Identity__6": "",
            "ArgMax": "topk",
            # "(Unnamed Layer* 153) [Constant]": "",
            # "(Unnamed Layer* 154) [Shape]": "",
            # "(Unnamed Layer* 155) [Gather]": "",
            # "(Unnamed Layer* 156) [Shuffle]": "",
            # "graph_outputs_Identity__4": "",
        }

        # rename layers to something more sensible
        nb_layers = self.network.num_layers
        for i in range(nb_layers):
            layer = self.network.get_layer(i)

            if layer.name in layer_name_map:
                new_layer_name = layer_name_map[layer.name]
                logging.debug("Renaming Layer: {:} -> {:}".format(
                    layer.name, new_layer_name))
                layer.name = new_layer_name
Exemple #5
0
    def postprocess(self, useConvForFC=False):
        # Set input dtype and format
        input_tensor = self.network.get_input(0)
        if self.input_dtype == "int8":
            input_tensor.dtype = trt.int8
            input_tensor.dynamic_range = (-128, 127)
        if self.input_format == "linear":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)
        elif self.input_format == "chw4":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CHW4)

        # Get the layers we care about.
        nb_layers = self.network.num_layers
        logging.debug(nb_layers)
        for i in range(nb_layers):
            layer = self.network.get_layer(i)
            logging.info("({:}) Layer '{:}' -> Type: {:} ON {:}".format(
                i, layer.name, layer.type,
                self.builder_config.get_device_type(layer)))

            # Detect the FC layer.
            # if "Fully Connected" in layer.name:
            if "MatMul" in layer.name:
                fc_layer = layer
                assert fc_layer.type == trt.LayerType.FULLY_CONNECTED
                fc_layer.__class__ = trt.IFullyConnectedLayer
                fc_kernel = fc_layer.kernel.reshape(1001, 2048)[1:, :]
                fc_bias = fc_layer.bias[1:]

                # (i-13)th layer should be reduction.
                reduce_layer = self.network.get_layer(i - 13)
                assert reduce_layer.type == trt.LayerType.REDUCE
                reduce_layer.__class__ = trt.IReduceLayer

                # (i-14)th layer should be the last ReLU
                last_conv_layer = self.network.get_layer(i - 14)
                assert last_conv_layer.type == trt.LayerType.ACTIVATION
                last_conv_layer.__class__ = trt.IActivationLayer

        # Unmark the old output since we are going to add new layers for the final part of the network.
        while self.network.num_outputs > 0:
            logging.info("Unmarking output: {:}".format(
                self.network.get_output(0).name))
            self.network.unmark_output(self.network.get_output(0))

        # Replace the reduce layer with pooling layer
        pool_layer_new = self.network.add_pooling(
            last_conv_layer.get_output(0), trt.PoolingType.AVERAGE, (7, 7))
        pool_layer_new.name = "squeeze_replaced"
        pool_layer_new.get_output(0).name = "squeeze_replaced_output"

        # Add fc layer
        fc_kernel = fc_kernel.flatten()
        if useConvForFC:
            fc_layer_new = self.network.add_convolution(
                pool_layer_new.get_output(0), fc_bias.size, (1, 1), fc_kernel,
                fc_bias)
        else:
            fc_layer_new = self.network.add_fully_connected(
                pool_layer_new.get_output(0), fc_bias.size, fc_kernel, fc_bias)
        fc_layer_new.name = "fc_replaced"
        fc_layer_new.get_output(0).name = "fc_replaced_output"

        # Add topK layer.
        topk_layer = self.network.add_topk(fc_layer_new.get_output(0),
                                           trt.TopKOperation.MAX, 1, 2)
        topk_layer.name = "topk_layer"
        topk_layer.get_output(0).name = "topk_layer_output_value"
        topk_layer.get_output(1).name = "topk_layer_output_index"

        # Mark the new output.
        self.network.mark_output(topk_layer.get_output(1))

        if self.network.num_outputs != 1:
            logging.warning(
                "num outputs should be 1 after unmarking! Has {:}".format(
                    self.network.num_outputs))
            raise Exception