def test_compile_inconsistent_batch_size(self): onnx_model = _make_onnx_model(func=lambda input_x, input_y, _: tf.add(input_x, input_y, name='z'), batch_size_1=3, batch_size_2=None) with self.assertRaises(ValueError) as error: compiler.compile_source(source=onnx_model, config=Config(max_batch_size=4)) self.assertEqual(error.exception.args, ('Inconsistent batch size specification.',))
def test_compile_simple(self): for batch_size in [3, None]: onnx_model = _make_onnx_model(func=lambda input_x, input_y, _: tf. add(input_x, input_y, name='z'), batch_size_1=batch_size, batch_size_2=batch_size) compiled = compiler.compile_source(source=onnx_model, config=Config(max_batch_size=4)) self.assertEqual(compiled.get_inputs(), [ ModelInput(name='x:0', data_type=TfDataType.DT_FLOAT, format=None, dims=[4]), ModelInput(name='y:0', data_type=TfDataType.DT_FLOAT, format=None, dims=[4]) ]) self.assertEqual(compiled.input_data_formats, [None, None]) self.assertEqual(compiled.get_outputs(), [ ModelOutput( name='z:0', data_type=TfDataType.DT_FLOAT, dims=[4]) ]) self.assertIsInstance(compiled.cuda_engine, ICudaEngine)
def test_compile_simple(self): onnx_model = _make_onnx_model( lambda input_x, input_y, _: tf.add(input_x, input_y, name='z')) compiled = compiler.compile_source(source=onnx_model, config=Config(max_batch_size=4)) self.assertEqual(compiled.inputs, [ Input(name='x:0', data_format=None), Input(name='y:0', data_format=None) ]) self.assertEqual(compiled.outputs, ['z:0']) self.assertIsInstance(compiled.cuda_engine, ICudaEngine)
def test_compile_fp16(self): def _build_model(input_x, input_y, session): weight = tf.Variable(initial_value=0.0, dtype=tf.float32, name='w') session.run(weight.initializer) return tf.multiply(weight, input_x + input_y, name='z') for batch_size in [3, None]: onnx_model = _make_onnx_model(func=_build_model, batch_size_1=batch_size, batch_size_2=batch_size) compiled = compiler.compile_source(source=onnx_model, config=Config(max_batch_size=4, enable_fp16=True, enable_strict_types=True)) self.assertEqual(compiled.get_inputs(), [ModelInput(name='x:0', data_type=TfDataType.DT_FLOAT, format=None, dims=[4]), ModelInput(name='y:0', data_type=TfDataType.DT_FLOAT, format=None, dims=[4])]) self.assertEqual(compiled.input_data_formats, [None, None]) self.assertEqual(compiled.get_outputs(), [ModelOutput(name='z:0', data_type=TfDataType.DT_FLOAT, dims=[4])]) self.assertIsInstance(compiled.cuda_engine, ICudaEngine)
def test_compile_int8(self): from .. import mini_cuda # pylint: disable=import-outside-toplevel def _build_model(input_x, input_y, session): weight = tf.Variable(initial_value=0.0, dtype=tf.float32, name='w') session.run(weight.initializer) return tf.multiply(weight, input_x + input_y, name='z') class _MyCalibrator(IInt8EntropyCalibrator2): def __init__(self): super().__init__() self._buffers = [mini_cuda.allocate_memory(4 * 4 * 3), mini_cuda.allocate_memory(4 * 4 * 3)] self._cache = None self._index = 0 def close(self): for buffer in self._buffers: buffer.close() def get_batch(self, names, p_str=None): del names, p_str if self._index == 16: return None self._index += 1 return list(map(int, self._buffers)) def get_batch_size(self): return 3 def read_calibration_cache(self): return self._cache def write_calibration_cache(self, cache): self._cache = cache mini_cuda.init() for batch_size in [3, None]: onnx_model = _make_onnx_model(func=_build_model, batch_size_1=batch_size, batch_size_2=batch_size) with contextlib.closing(mini_cuda.get_device(0).create_context(0)), \ contextlib.closing(_MyCalibrator()) as calibrator: compiled = compiler.compile_source(source=onnx_model, config=Config(max_batch_size=4, int8_calibrator=calibrator, enable_int8=True, enable_fp16=True, enable_strict_types=True)) self.assertEqual(compiled.get_inputs(), [ModelInput(name='x:0', data_type=TfDataType.DT_FLOAT, format=None, dims=[4]), ModelInput(name='y:0', data_type=TfDataType.DT_FLOAT, format=None, dims=[4])]) self.assertEqual(compiled.input_data_formats, [None, None]) self.assertEqual(compiled.get_outputs(), [ModelOutput(name='z:0', data_type=TfDataType.DT_FLOAT, dims=[4])]) self.assertIsInstance(compiled.cuda_engine, ICudaEngine)
def test_compile_int8_with_customize_calibrator(self): with TemporaryDirectory() as file_path, NamedTemporaryFile(mode='w+', suffix='.py') as calibrator_path: resnet50_onnx_model_file = _make_resnet50_onnx_model_file(file_path) resnet50_onnx_model = onnx_compiler.compile_source(source=ONNXModelFile(resnet50_onnx_model_file), config=OnnxConfig( input_formats=[DataFormat.CHANNELS_FIRST])) calibration_dataset_sample = _make_calibration_dataset_simple(file_path) calibrator_path.write( "import tensorrt as trt\n" "import pycuda.driver as cuda\n" "import pycuda.autoinit\n" "import os\n" "import numpy as np\n" "from PIL import Image\n" "import torchvision.transforms as transforms\n" "\n\n" "class MyCalibrator(trt.IInt8EntropyCalibrator2):\n" " def __init__(self, calibration_dataset, batch_size):\n" " trt.IInt8EntropyCalibrator2.__init__(self)\n" " self.cache_file = None\n" " self.batch_size = batch_size\n" " self.channel, self.width, self.height = 3, 224, 224\n" " txt_file = open(os.path.join(calibration_dataset, '..', 'calibration_data.txt'), 'r')\n" " image_lines = txt_file.readlines()\n" " self.images = [os.path.join(calibration_dataset, image_line.split('\n')[0])" " for image_line in image_lines]\n" " self.data_shape = (self.batch_size, self.channel, self.height, self.width)" " self.data_size = trt.volume(list(self.data_shape)) * trt.float32.itemsize\n" " self.current_index = 0" " self.device_input = cuda.mem_alloc(self.data_size)\n" "\n" " def get_batch_size(self):\n" " return self.batch_size\n" "\n" " def get_batch(self, names, p_str=None):\n" " img_transform = transforms.Compose([transforms.Resize([self.height, self.width])," " transforms.ToTensor(), ])\n" " batch_images = np.zeros(self.data_shape, dtype=np.float32)\n" " img = Image.open(self.images[0])\n" " batch_images[0] = img_transform(img).numpy()\n" " if self.current_index + self.batch_size > len(batch_images):" " return None" " self.current_index += self.batch_size" " cuda.memcpy_htod(self.device_input, batch_images.astype(np.float32))\n" " return [int(self.device_input)]\n" "\n" " def read_calibration_cache(self):\n" " return self.cache_file\n" "\n" " def write_calibration_cache(self, cache):\n" " self.cache_file = cache\n") calibrator_path.seek(0) compiled = compiler.compile_source(source=resnet50_onnx_model, config=TensorrtConfig(max_batch_size=1, data_type='INT8', calibration_dataset=calibration_dataset_sample, calibrator=calibrator_path.name)) self.assertEqual(compiled.get_inputs(), [ModelOutput(name='input.1', data_type=TfDataType.DT_FLOAT, dims=[3, 224, 224])]) self.assertEqual(compiled.input_data_formats, [DataFormat.CHANNELS_FIRST]) self.assertEqual(compiled.get_outputs(), [ModelOutput(name='495', data_type=TfDataType.DT_FLOAT, dims=[4])]) self.assertIsInstance(compiled.cuda_engine, ICudaEngine)