Ejemplo n.º 1
0
def convert_tensorrt(torch_model, ts_model, data_loader):
    # TRTorch need to give range of input shapes
    # parse all the shape of input shapes
    min_size = 987654321
    max_size = 0
    for d in data_loader:
        image = d[0]["image"]
        inputs = [{"image": image}]  # remove other unused keys
        images = torch_model.preprocess_image(inputs)
        _, _, h, w = images.tensor.shape
        min_val = min(h, w)
        max_val = max(h, w)
        if min_val < min_size:
            min_size = min_val
        if max_val > max_size:
            max_size = max_val
    

    # compile settings
    compile_settings = {
        "input_shapes": [
            {
                "min": [1, 3, min_size, min_size],
                "opt": [1, 3, cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST],
                "max": [1, 3, max_size, max_size]
            },
        ],
        "op_precision": torch.float32 # If you want Run with FP16, 'torch.half' instead
    }

    ts_model.eval()
    trt_ts_module = trtorch.compile(ts_model, compile_settings)
    with PathManager.open(os.path.join(args.output, "trt_ts_module.ts"), "wb") as f:
        torch.jit.save(trt_ts_module, f)
Ejemplo n.º 2
0
    def test_compile_script(self):

        fp32_test_acc = self.compute_accuracy(self.testing_dataloader,
                                              self.model)
        log(Level.Info,
            "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc))

        compile_spec = {
            "input_shapes": [[1, 3, 32, 32]],
            "op_precision": torch.int8,
            "calibrator": self.calibrator,
            "device": {
                "device_type": trtorch.DeviceType.GPU,
                "gpu_id": 0,
                "dla_core": 0,
                "allow_gpu_fallback": False,
            }
        }

        trt_mod = trtorch.compile(self.model, compile_spec)
        int8_test_acc = self.compute_accuracy(self.testing_dataloader, trt_mod)
        log(Level.Info,
            "[TRT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc))
        acc_diff = fp32_test_acc - int8_test_acc
        self.assertTrue(abs(acc_diff) < 3)
Ejemplo n.º 3
0
    def test_compile_traced(self):
        extra_info = {
            "input_shapes": [self.input.shape],
        }

        trt_mod = trtorch.compile(self.traced_model, extra_info)
        same = (trt_mod(self.input) -
                self.traced_model(self.input)).abs().max()
        self.assertTrue(same < 2e-3)
Ejemplo n.º 4
0
def main():

    model = ConvGelu().eval().cuda()
    scripted_model = torch.jit.script(model)

    compile_settings = {
        "input_shapes": [[1, 3, 5, 5]],
        "op_precision": torch.float32
    }

    trt_ts_module = trtorch.compile(scripted_model, compile_settings)
    torch.jit.save(trt_ts_module, 'conv_gelu.jit')

    norm_model = Norm().eval().cuda()
    norm_ts_module = torch.jit.script(norm_model)
    norm_trt_ts = trtorch.compile(norm_ts_module, compile_settings)
    torch.jit.save(norm_trt_ts, 'norm.jit')
    print("Generated Torchscript-TRT models.")
Ejemplo n.º 5
0
    def test_compile_script(self):
        compile_spec = {
            "input_shapes": [self.input.shape],
        }

        trt_mod = trtorch.compile(self.scripted_model, compile_spec)
        same = (trt_mod(self.input) -
                self.scripted_model(self.input)).abs().max()
        self.assertTrue(same < 2e-3)
Ejemplo n.º 6
0
    def test_compile_script(self):
        compile_spec = {
            "input_shapes": [self.input.shape],
            "device": {
                "device_type": trtorch.DeviceType.GPU,
                "gpu_id": 0,
                "dla_core": 0,
                "allow_gpu_fallback": False,
                "disable_tf32": False
            }
        }

        trt_mod = trtorch.compile(self.scripted_model, compile_spec)
        same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max()
        self.assertTrue(same < 2e-3)
Ejemplo n.º 7
0
    def test_compile_script(self):
        trtorch.set_device(0)
        compile_spec = {
            "input_shapes": [self.input.shape],
            "device": {
                "device_type": trtorch.DeviceType.GPU,
                "gpu_id": self.target_gpu,
                "dla_core": 0,
                "allow_gpu_fallback": False,
                "disable_tf32": False
            }
        }

        trt_mod = trtorch.compile(self.scripted_model, compile_spec)
        # Changing the device ID deliberately. It should still run on correct device ID by context switching
        trtorch.set_device(1)
        same = (trt_mod(self.input) -
                self.scripted_model(self.input)).abs().max()
        self.assertTrue(same < 2e-3)
Ejemplo n.º 8
0
    def test_compile_script(self):
        compile_spec = {
            "input_shapes": [self.input.shape],
            "device": {
                "device_type": trtorch.DeviceType.GPU,
                "gpu_id": 0,
                "dla_core": 0,
                "allow_gpu_fallback": False,
                "disable_tf32": False
            },
            "torch_fallback": {
                "enabled": True,
                "forced_fallback_ops": ["aten::max_pool2d"],
                "min_block_size": 1
            }
        }

        trt_mod = trtorch.compile(self.scripted_model, compile_spec)
        same = (trt_mod(self.input) -
                self.scripted_model(self.input)).abs().max()
        self.assertTrue(same < 2e-3)
Ejemplo n.º 9
0
def main():
    model = Elu().eval()  #.cuda()

    scripted_model = torch.jit.script(model)
    compile_settings = {
        "input_shapes": [{
            "min": [1024, 1, 32, 32],
            "opt": [1024, 1, 33, 33],
            "max": [1024, 1, 34, 34],
        }],
        "op_precision":
        torch.half  # Run with FP16
    }
    trt_ts_module = trtorch.compile(scripted_model, compile_settings)
    input_data = torch.randn((1024, 1, 32, 32))
    input_data = input_data.half().to("cuda")
    pytorch_out = model.forward(input_data)

    trtorch_out = trt_ts_module(input_data)
    print('PyTorch output: \n', pytorch_out[0, :, :, 0])
    print('TRTorch output: \n', trtorch_out[0, :, :, 0])
    cal_max_diff(pytorch_out, trtorch_out)
Ejemplo n.º 10
0
def simple_trtorch_example(device='cuda'):
	input_shape = 128, 3, 224, 224  # (batch size, channel, height, width).

	preprocess = resnet50_preprocess()
	fig, axes = plt.subplots(nrows=2, ncols=2)
	for i in range(4):
		img_path = './data/img{}.JPG'.format(i)
		img = Image.open(img_path)
		input_tensor = preprocess(img)      
		plt.subplot(2, 2, i + 1)
		plt.imshow(img)
		plt.axis('off')

	with open('./data/imagenet_class_index.json') as fd: 
		d = json.load(fd)

	print('Number of classes in ImageNet: {}'.format(len(d)))

	#--------------------
	resnet50_model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet50', pretrained=True)
	resnet50_model.eval()

	# Decode the results into ([predicted class, description], probability).
	def predict(img_path, model):
		img = Image.open(img_path)
		#preprocess = resnet50_preprocess()
		input_tensor = preprocess(img)
		input_batch = input_tensor.unsqueeze(0)  # Create a mini-batch as expected by the model.

		# Move the input and model to GPU for speed if available.
		if torch.cuda.is_available():
			input_batch = input_batch.to(device)
			model.to(device)

		with torch.no_grad():
			output = model(input_batch)
			# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes.
			sm_output = torch.nn.functional.softmax(output[0], dim=0)

		ind = torch.argmax(sm_output)
		return d[str(ind.item())], sm_output[ind]  # ([predicted class, description], probability).

	for i in range(4):
		img_path = './data/img{}.JPG'.format(i)
		img = Image.open(img_path)

		pred, prob = predict(img_path, resnet50_model)
		print('{}: Predicted = {}, Probablility = {}.'.format(img_path, pred, prob))

		plt.subplot(2, 2, i + 1)
		plt.imshow(img)
		plt.axis('off')
		plt.title(pred[1])

	#--------------------
	# Model benchmark without TRTorch/TensorRT

	model = resnet50_model.eval().to(device)

	cuda_cnn_benchmark(model, input_shape, nruns=1000, device=device)

	#--------------------
	# Create TorchScript modules.

	# Tracing.
	model = resnet50_model.eval().to(device)
	print('Start creating a traced model...')
	start_time = time.time()
	traced_model = torch.jit.trace(model, [torch.randn(input_shape, device=device)])
	print('End creating a traced model: {} secs.'.format(time.time() - start_time))

	#torch.jit.save(traced_model, './resnet50_traced.jit.pt')

	cuda_cnn_benchmark(traced_model, input_shape, nruns=1000, device=device)

	# Compile with TRTorch.
	import trtorch

	# FP32 (single precision).
	# The compiled module will have precision as specified by "op_precision".
	print('Start creating a TRTorch model (FP32)...')
	start_time = time.time()
	trt_ts_model_fp32 = trtorch.compile(traced_model, {
		'input_shapes': [input_shape],
		'op_precision': torch.float32,  # Run with FP32.
		'workspace_size': 1 << 20
	})
	print('End creating a TRTorch model (FP32): {} secs.'.format(time.time() - start_time))

	#torch.jit.save(trt_ts_model_fp32, './resnet50_fp32.ts')

	cuda_cnn_benchmark(trt_ts_model_fp32, input_shape, nruns=1000, device=device)

	# FP16 (half precision).
	# The compiled module will have precision as specified by "op_precision".
	print('Start creating a TRTorch model (FP16)...')
	start_time = time.time()
	trt_ts_model_fp16 = trtorch.compile(traced_model, {
		'input_shapes': [input_shape],
		'op_precision': torch.half,  # Run with FP16.
		'workspace_size': 1 << 20
	})
	print('End creating a TRTorch model (FP16): {} secs.'.format(time.time() - start_time))

	#torch.jit.save(trt_ts_model_fp16, './resnet50_fp16.ts')

	cuda_cnn_benchmark(trt_ts_model_fp16, input_shape, is_fp16=True, nruns=1000, device=device)
Ejemplo n.º 11
0
# not all operations supported :/ 
if trtorch is not None:
    with torch.no_grad():
        x = cv2_frame_to_cuda(cam.read())

    print(x.shape)
    shape = list(x.shape)
    compile_settings = {
        "input_shapes": [shape, shape],
        # "input_shapes": [
        #     # [shape, shape]
        #     # {
        #     #     "min": [1, 3, 224, 224],
        #     #     "opt": [1, 3, 512, 512],
        #     #     "max": [1, 3, 1024, 1024]
        #     # }, # For static size [1, 3, 224, 224]
        # ],
        "op_precision": torch.half, # Run with FP16
        "num_min_timing_iters": 2, # Default: 2
        "num_avg_timing_iters": 1, # Default: 1
        "max_batch_size": 1, # Maximum batch size (must be >= 1 to be set, 0 means not set)
    }

    # script_model = torch.jit.script(model)
    traced_model = torch.jit.trace(model, [x, x])
    trt_ts_module = trtorch.compile(traced_model, compile_settings)

    x = x.half()
    result = trt_ts_module(x, x)
    torch.jit.save(trt_ts_module, "trt_torchscript_module_fp16.ts")