Example #1
0
 def refit_engine(self, net):
     with trt.Refitter(self.engine, self.logger) as refitter:
         state_dict = net.state_dict()
         variables = []
         # Why use a variable list?
         # we know that in c++ functions, a python array may be deleted
         # after ref count of a var decrease to zero.
         # TensorRT 5.1.5.0 refitter ONLY EXECUTED in refit_cuda_engine,
         # so we must keep variable alive before refit_cuda_engine call.
         for k, v in self.refit_weight_dict.items():
             if v["type"] == "Linear":
                 weight = state_dict[v["weight"]].detach().cpu().numpy()
                 refitter.set_weights(k, trt.WeightsRole.KERNEL, weight)
                 variables.append(weight)
                 if "bias" in v:
                     bias = state_dict[v["bias"]].detach().cpu().numpy()
                     refitter.set_weights(k, trt.WeightsRole.BIAS, bias)
                     variables.append(bias)
             elif v["type"] == "Convolution":
                 weight = state_dict[
                     v["weight"]].detach().float().cpu().numpy()
                 refitter.set_weights(k, trt.WeightsRole.KERNEL, weight)
                 variables.append(weight)
                 if "bias" in v:
                     bias = state_dict[v["bias"]].detach().cpu().numpy()
                     refitter.set_weights(k, trt.WeightsRole.BIAS, bias)
                     variables.append(bias)
             elif v["type"] == "BatchNorm":
                 running_var = state_dict[v["running_var"]]
                 running_mean = state_dict[v["running_mean"]]
                 weight = state_dict[v["weight"]]
                 bias = state_dict[v["bias"]]
                 eps = v["eps"]
                 running_mean = running_mean.detach().cpu().numpy()
                 running_var = running_var.detach().cpu().numpy()
                 weight = weight.detach().cpu().numpy()
                 bias = bias.detach().cpu().numpy()
                 shift = (-running_mean /
                          np.sqrt(running_var + eps)) * weight + bias
                 scale = weight / np.sqrt(running_var + eps)
                 refitter.set_weights(k, trt.WeightsRole.SCALE, scale)
                 refitter.set_weights(k, trt.WeightsRole.SHIFT, shift)
                 variables.append(scale)
                 variables.append(shift)
             else:
                 raise NotImplementedError
         # Get description of missing weights. This should return empty
         # lists in this case.
         [missingLayers, weightRoles] = refitter.get_missing()
         assert len(
             missingLayers
         ) == 0, "Refitter found missing weights. Call set_weights() for all missing weights"
         # Refit the engine with the new weights. This will return True if
         # the refit operation succeeded.
         assert refitter.refit_cuda_engine()
def main():
    onnx_file_path = 'bidaf-modified.onnx'
    engine_file_path = "bidaf.trt"

    # input
    context = 'A quick brown fox jumps over the lazy dog.'
    query = 'What color is the fox?'
    cw_str, _ = preprocess(context)
    # get ravelled data
    cw, cc, qw, qc = get_inputs(context, query)

    # Do inference with TensorRT
    refit_weights = np.load("Parameter576_B_0.npy")
    fake_weights = np.ones_like(refit_weights)
    engine = get_engine(onnx_file_path, engine_file_path)
    refitter = trt.Refitter(engine, TRT_LOGGER)
    context = engine.create_execution_context()

    for weights, answer_correct in [(fake_weights, False), (refit_weights, True)]:
        print("Refitting engine...")
        # To get a list of all refittable weights' names
        # in the network, use refitter.get_all_weights().

        # Refit named weights via set_named_weights
        refitter.set_named_weights('Parameter576_B_0', weights)
        # Get missing weights names. This should return empty
        # lists in this case.
        missing_weights = refitter.get_missing_weights()
        assert len(
            missing_weights) == 0, "Refitter found missing weights. Call set_named_weights() or set_weights() for all missing weights"
        # Refit the engine with the new weights. This will return True if
        # the refit operation succeeded.
        assert refitter.refit_cuda_engine()

        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        print("Doing inference...")
        # Do inference
        # Set host input. The common.do_inference_v2 function will copy the input to the GPU before executing.
        inputs[0].host = cw
        inputs[1].host = cc
        inputs[2].host = qw
        inputs[3].host = qc
        trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

        start = np.asscalar(trt_outputs[0])
        end = np.asscalar(trt_outputs[1])
        answer = [w.encode() for w in cw_str[start:end + 1].reshape(-1)]
        assert answer_correct == (answer == [b'brown'])
    print("Passed")
Example #3
0
def main():
    common.add_help(description="Runs an MNIST network using a PyTorch model")
    # Train the PyTorch model
    mnist_model = model.MnistModel()
    mnist_model.learn()
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    with build_engine_with_some_missing_weights(weights) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        print("Accuracy Before Engine Refit")
        get_trt_test_accuracy(engine, inputs, outputs, bindings, stream,
                              mnist_model)

        # Refit the engine with the actual trained weights for the conv_1 layer.
        with trt.Refitter(engine, TRT_LOGGER) as refitter:
            # To get a list of all refittable layers and associated weightRoles
            # in the network, use refitter.get_all()
            # Set the actual weights for the conv_1 layer. Since it consists of
            # kernel weights and bias weights, set each of them by specifying
            # the WeightsRole.
            refitter.set_weights("conv_1", trt.WeightsRole.KERNEL,
                                 weights['conv1.weight'].numpy())
            refitter.set_weights("conv_1", trt.WeightsRole.BIAS,
                                 weights['conv1.bias'].numpy())
            # Get description of missing weights. This should return empty
            # lists in this case.
            [missingLayers, weightRoles] = refitter.get_missing()
            assert len(
                missingLayers
            ) == 0, "Refitter found missing weights. Call set_weights() for all missing weights"
            # Refit the engine with the new weights. This will return True if
            # the refit operation succeeded.
            assert refitter.refit_cuda_engine()

        expected_correct_predictions = mnist_model.get_latest_test_set_accuracy(
        )
        print(
            "Accuracy After Engine Refit (expecting {:.1f}% correct predictions)"
            .format(100 * expected_correct_predictions))
        assert get_trt_test_accuracy(
            engine, inputs, outputs, bindings, stream,
            mnist_model) >= expected_correct_predictions
Example #4
0
def run():
    logger = trt.Logger(trt.Logger.ERROR)
    if os.path.isfile(trtFile):
        with open(trtFile, 'rb') as f:
            engine = trt.Runtime(logger).deserialize_cuda_engine(f.read())
        if engine == None:
            print("Failed loading engine!")
            exit()
        print("Succeeded loading engine!")
        onnxFile = onnxFile1  # 已经有 model.plan,读进 model1.onnx 做 Refit
    else:
        onnxFile = onnxFile0  # 还没有 model.plan,先用 model0.onnx 构建 model.plan

    builder = trt.Builder(logger)
    network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    config = builder.create_builder_config()
    config.flags = 1 << int(trt.BuilderFlag.REFIT)
    config.max_workspace_size = 3 << 30
    parser = trt.OnnxParser(network, logger)
    if not os.path.exists(onnxFile):
        print("Failed finding .onnx file!")
        exit()
    print("Succeeded finding .onnx file!")
    with open(onnxFile, 'rb') as model:
        if not parser.parse(model.read()):
            print("Failed parsing .onnx file!")
            for error in range(parser.num_errors):
                print(parser.get_error(error))
            exit()
        print("Succeeded parsing .onnx file!")

    if os.path.isfile(trtFile):  # 进行 Refit
        refitter = trt.Refitter(engine, logger)
        layerNameList, weightRoleList = refitter.get_all()
        for name, role in zip(layerNameList, weightRoleList):
            print("LayerName:%s,WeightRolw:%s"%(name, role))

        for i in range(network.num_layers):
            layer = network.get_layer(i)        
            if layer.name in layerNameList:
                                
                # 据实际网络情况,可能需要添加更多 Layer
                if layer.type == trt.LayerType.CONVOLUTION:
                    layer.__class__ = trt.IConvolutionLayer            
                    refitter.set_weights(layer.name, trt.WeightsRole.KERNEL, layer.kernel)
                    refitter.set_weights(layer.name, trt.WeightsRole.BIAS, layer.bias)
                    layerNameList.remove
                    
                if layer.type == trt.LayerType.FULLY_CONNECTED:
                    layer.__class__ = trt.IFullyConnectedLayer
                    refitter.set_weights(layer.name, trt.WeightsRole.KERNEL, layer.kernel)

                if layer.type == trt.LayerType.CONSTANT:
                    layer.__class__ = trt.IConstantLayer
                    refitter.set_weights(layer.name, trt.WeightsRole.CONSTANT, layer.weights)
                                                    
        if refitter.refit_cuda_engine() == False:        
            print("Failed refitting engine, missing weight:")
            [missingLayer, weightRole] = refitter.get_missing()
            for layer, role in zip(missingLayer, weightRole):
                print("\tLayerName:%s,WeightRolw:%s"%(name, role))
            return
        print("Succeeded refitting engine!")

    else:  # 构建 model.plan
        inputTensor = network.get_input(0)
        inputTensor.shape = [1, 1, 28, 28]
        '''  # 逐层打印网络信息
        for i in range(network.num_layers):
            layer = network.get_layer(i)        
            print(i, "%s,in=%d,out=%d,%s" % (str(layer.type)[10:], layer.num_inputs, layer.num_outputs, layer.name))
            for j in range(layer.num_inputs):
                tensor = layer.get_input(j)
                if tensor == None:
                    print("\tInput  %2d:" % j, "None")
                else:
                    print("\tInput  %2d:%s,%s,%s" % (j, tensor.shape, str(tensor.dtype)[9:], tensor.name))
            for j in range(layer.num_outputs):
                tensor = layer.get_output(j)
                if tensor == None:
                    print("\tOutput %2d:" % j, "None")
                else:
                    print("\tOutput %2d:%s,%s,%s" % (j, tensor.shape, str(tensor.dtype)[9:], tensor.name))
        '''
        engineString = builder.build_serialized_network(network, config)
        if engineString == None:
            print("Failed building engine!")
            exit()
        print("Succeeded building engine!")
        with open(trtFile, 'wb') as f:
            f.write(engineString)
        engine = trt.Runtime(logger).deserialize_cuda_engine(engineString)

    context = engine.create_execution_context()
    context.set_binding_shape(0, [1, 1, 28, 28])
    _, stream = cudart.cudaStreamCreate()
    print("Binding0->", engine.get_binding_shape(0), context.get_binding_shape(0), engine.get_binding_dtype(0))
    print("Binding1->", engine.get_binding_shape(1), context.get_binding_shape(1), engine.get_binding_dtype(1))

    data = cv2.imread(inputImage, cv2.IMREAD_GRAYSCALE).astype(np.float32)
    inputH0 = np.ascontiguousarray(data.reshape(-1))
    outputH0 = np.empty(context.get_binding_shape(1), dtype=trt.nptype(engine.get_binding_dtype(1)))
    _, inputD0 = cudart.cudaMallocAsync(inputH0.nbytes, stream)
    _, outputD0 = cudart.cudaMallocAsync(outputH0.nbytes, stream)

    cudart.cudaMemcpyAsync(inputD0, inputH0.ctypes.data, inputH0.nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, stream)
    context.execute_async_v2([int(inputD0), int(outputD0)], stream)
    cudart.cudaMemcpyAsync(outputH0.ctypes.data, outputD0, outputH0.nbytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, stream)
    cudart.cudaStreamSynchronize(stream)

    print("inputH0 :", data.shape)
    #print(data)
    print("outputH0:", outputH0.shape)
    print(outputH0)

    cudart.cudaStreamDestroy(stream)
    cudart.cudaFree(inputD0)
    cudart.cudaFree(outputD0)
    print("Succeeded running model in TensorRT!")
def run(nRunTime):
    logger = trt.Logger(trt.Logger.ERROR)
    if os.path.isfile(trtFile):
        with open(trtFile, 'rb') as f:
            engine = trt.Runtime(logger).deserialize_cuda_engine(f.read())
        if engine == None:
            print("Failed loading engine!")
            return
        print("Succeeded loading engine!")
    else:
        builder = trt.Builder(logger)
        network = builder.create_network(
            1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
        config = builder.create_builder_config()
        config.flags = 1 << int(trt.BuilderFlag.REFIT)

        inputT0 = network.add_input('inputT0', trt.float32,
                                    (nIn, cIn, hIn, wIn))
        fakeWeight = np.zeros([cOut, cIn, wW, wW], dtype=np.float32)
        fakeBias = np.zeros([cOut], dtype=np.float32)
        convolutionLayer = network.add_convolution_nd(inputT0, cOut, (hW, wW),
                                                      fakeWeight, fakeBias)
        #convolutionLayer.name = 'conv'
        network.set_weights_name(convolutionLayer.kernel, "conv-w")
        network.set_weights_name(convolutionLayer.bias, "conv-b")

        network.mark_output(convolutionLayer.get_output(0))
        engineString = builder.build_serialized_network(network, config)
        if engineString == None:
            print("Failed building engine!")
            return
        print("Succeeded building engine!")
        with open(trtFile, 'wb') as f:
            f.write(engineString)
        engine = trt.Runtime(logger).deserialize_cuda_engine(engineString)

    if nRunTime == 0:
        print("Do not refit!")
    else:
        print("Refit!")
        refitter = trt.Refitter(engine, logger)
        refitter.set_named_weights("conv-w", weight)
        refitter.set_named_weights("conv-b", bias)

        [missingLayer, weightRole] = refitter.get_missing()
        for layer, role in zip(missingLayer, weightRole):
            print("[", layer, "-", role, "]")

        if refitter.refit_cuda_engine() == False:
            print("Failed Refitting engine!")
            return

    context = engine.create_execution_context()
    _, stream = cudart.cudaStreamCreate()
    inputH0 = np.ascontiguousarray(data.reshape(-1))
    outputH0 = np.empty(context.get_binding_shape(1),
                        dtype=trt.nptype(engine.get_binding_dtype(1)))
    _, inputD0 = cudart.cudaMallocAsync(inputH0.nbytes, stream)
    _, outputD0 = cudart.cudaMallocAsync(outputH0.nbytes, stream)

    cudart.cudaMemcpyAsync(inputD0, inputH0.ctypes.data, inputH0.nbytes,
                           cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
                           stream)
    context.execute_async_v2([int(inputD0), int(outputD0)], stream)
    cudart.cudaMemcpyAsync(outputH0.ctypes.data, outputD0, outputH0.nbytes,
                           cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
                           stream)
    cudart.cudaStreamSynchronize(stream)

    print("data:", data.shape)
    print(data)
    print("outputH0:", outputH0.shape)
    print(outputH0)

    cudart.cudaStreamDestroy(stream)
    cudart.cudaFree(inputD0)
    cudart.cudaFree(outputD0)
Example #6
0
def main():
    #add_help参考common.py中的实现,实际上是一个命令行参数解析器
    common.add_help(description="Runs an MNIST network using a PyTorch model")
    # Train the PyTorch model
    #训练相应的模型
    #创建一个模型
    mnist_model = model.MnistModel()
    #进行训练
    mnist_model.learn()
    #提取相应的权重
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    #在tensorrt中进行相应的推理
    #build_engine_with_some_missing_weights参考本文件中的具体实现
    with build_engine_with_some_missing_weights(weights) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        #allocate_buffers的具体实现参考common.py
        #分配相应的缓冲区,返回输入输出数据缓冲区指列表和相应的绑定等列表
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        print("Accuracy Before Engine Refit")
        #进行相应的推理并计算准确率
        get_trt_test_accuracy(engine, inputs, outputs, bindings, stream,
                              mnist_model)

        # Refit the engine with the actual trained weights for the conv_1 layer.
        #用训练过的第一个卷积层的权值重新填充引擎
        #Refitter用来更新引擎中的权重,具体参考https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Refitter.html?highlight=refitter#tensorrt.Refitter
        with trt.Refitter(engine, TRT_LOGGER) as refitter:
            # To get a list of all refittable layers and associated weightRoles
            # in the network, use refitter.get_all()
            # Set the actual weights for the conv_1 layer. Since it consists of
            # kernel weights and bias weights, set each of them by specifying
            # the WeightsRole.
            #set_weights用于给指定的层次指定新的权值
            #具体参考https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Refitter.html?highlight=set_weights#tensorrt.Refitter.set_weights
            refitter.set_weights("conv_1", trt.WeightsRole.KERNEL,
                                 weights['conv1.weight'].numpy())
            refitter.set_weights("conv_1", trt.WeightsRole.BIAS,
                                 weights['conv1.bias'].numpy())
            # Get description of missing weights. This should return empty
            # lists in this case.
            #get_missing用来获取相应丢失权重的描述
            [missingLayers, weightRoles] = refitter.get_missing()
            #判断是否存在丢失权重的层次
            assert len(
                missingLayers
            ) == 0, "Refitter found missing weights. Call set_weights() for all missing weights"
            # Refit the engine with the new weights. This will return True if
            # the refit operation succeeded.
            #refit_cuda_engine用来更新相关的引擎,如果成功返回true
            assert refitter.refit_cuda_engine()
        #get_latest_test_set_accuracy的具体实现参考model.py中的实现
        #用来获取最后一次训练得到的准确率
        expected_correct_predictions = mnist_model.get_latest_test_set_accuracy(
        )
        print(
            "Accuracy After Engine Refit (expecting {:.1f}% correct predictions)"
            .format(100 * expected_correct_predictions))
        #获取相应的tensorrt的推理准确率
        assert get_trt_test_accuracy(
            engine, inputs, outputs, bindings, stream,
            mnist_model) >= expected_correct_predictions