def test_compile_inconsistent_batch_size(self):
        onnx_model = _make_onnx_model(func=lambda input_x, input_y, _: tf.add(input_x, input_y, name='z'),
                                      batch_size_1=3,
                                      batch_size_2=None)

        with self.assertRaises(ValueError) as error:
            compiler.compile_source(source=onnx_model, config=Config(max_batch_size=4))

        self.assertEqual(error.exception.args, ('Inconsistent batch size specification.',))
    def test_compile_simple(self):
        for batch_size in [3, None]:
            onnx_model = _make_onnx_model(func=lambda input_x, input_y, _: tf.
                                          add(input_x, input_y, name='z'),
                                          batch_size_1=batch_size,
                                          batch_size_2=batch_size)

            compiled = compiler.compile_source(source=onnx_model,
                                               config=Config(max_batch_size=4))

            self.assertEqual(compiled.get_inputs(), [
                ModelInput(name='x:0',
                           data_type=TfDataType.DT_FLOAT,
                           format=None,
                           dims=[4]),
                ModelInput(name='y:0',
                           data_type=TfDataType.DT_FLOAT,
                           format=None,
                           dims=[4])
            ])

            self.assertEqual(compiled.input_data_formats, [None, None])
            self.assertEqual(compiled.get_outputs(), [
                ModelOutput(
                    name='z:0', data_type=TfDataType.DT_FLOAT, dims=[4])
            ])
            self.assertIsInstance(compiled.cuda_engine, ICudaEngine)
    def test_compile_simple(self):
        onnx_model = _make_onnx_model(
            lambda input_x, input_y, _: tf.add(input_x, input_y, name='z'))
        compiled = compiler.compile_source(source=onnx_model,
                                           config=Config(max_batch_size=4))

        self.assertEqual(compiled.inputs, [
            Input(name='x:0', data_format=None),
            Input(name='y:0', data_format=None)
        ])

        self.assertEqual(compiled.outputs, ['z:0'])

        self.assertIsInstance(compiled.cuda_engine, ICudaEngine)
    def test_compile_fp16(self):
        def _build_model(input_x, input_y, session):
            weight = tf.Variable(initial_value=0.0, dtype=tf.float32, name='w')

            session.run(weight.initializer)

            return tf.multiply(weight, input_x + input_y, name='z')

        for batch_size in [3, None]:
            onnx_model = _make_onnx_model(func=_build_model, batch_size_1=batch_size, batch_size_2=batch_size)

            compiled = compiler.compile_source(source=onnx_model,
                                               config=Config(max_batch_size=4,
                                                             enable_fp16=True,
                                                             enable_strict_types=True))

            self.assertEqual(compiled.get_inputs(),
                             [ModelInput(name='x:0', data_type=TfDataType.DT_FLOAT, format=None, dims=[4]),
                              ModelInput(name='y:0', data_type=TfDataType.DT_FLOAT, format=None, dims=[4])])

            self.assertEqual(compiled.input_data_formats, [None, None])
            self.assertEqual(compiled.get_outputs(), [ModelOutput(name='z:0', data_type=TfDataType.DT_FLOAT, dims=[4])])
            self.assertIsInstance(compiled.cuda_engine, ICudaEngine)
    def test_compile_int8(self):
        from .. import mini_cuda  # pylint: disable=import-outside-toplevel

        def _build_model(input_x, input_y, session):
            weight = tf.Variable(initial_value=0.0, dtype=tf.float32, name='w')

            session.run(weight.initializer)

            return tf.multiply(weight, input_x + input_y, name='z')

        class _MyCalibrator(IInt8EntropyCalibrator2):
            def __init__(self):
                super().__init__()

                self._buffers = [mini_cuda.allocate_memory(4 * 4 * 3), mini_cuda.allocate_memory(4 * 4 * 3)]
                self._cache = None
                self._index = 0

            def close(self):
                for buffer in self._buffers:
                    buffer.close()

            def get_batch(self, names, p_str=None):
                del names, p_str

                if self._index == 16:
                    return None

                self._index += 1

                return list(map(int, self._buffers))

            def get_batch_size(self):
                return 3

            def read_calibration_cache(self):
                return self._cache

            def write_calibration_cache(self, cache):
                self._cache = cache

        mini_cuda.init()

        for batch_size in [3, None]:
            onnx_model = _make_onnx_model(func=_build_model, batch_size_1=batch_size, batch_size_2=batch_size)

            with contextlib.closing(mini_cuda.get_device(0).create_context(0)), \
                 contextlib.closing(_MyCalibrator()) as calibrator:
                compiled = compiler.compile_source(source=onnx_model,
                                                   config=Config(max_batch_size=4,
                                                                 int8_calibrator=calibrator,
                                                                 enable_int8=True,
                                                                 enable_fp16=True,
                                                                 enable_strict_types=True))

            self.assertEqual(compiled.get_inputs(),
                             [ModelInput(name='x:0', data_type=TfDataType.DT_FLOAT, format=None, dims=[4]),
                              ModelInput(name='y:0', data_type=TfDataType.DT_FLOAT, format=None, dims=[4])])

            self.assertEqual(compiled.input_data_formats, [None, None])
            self.assertEqual(compiled.get_outputs(), [ModelOutput(name='z:0', data_type=TfDataType.DT_FLOAT, dims=[4])])
            self.assertIsInstance(compiled.cuda_engine, ICudaEngine)
Example #6
0
    def test_compile_int8_with_customize_calibrator(self):
        with TemporaryDirectory() as file_path, NamedTemporaryFile(mode='w+', suffix='.py') as calibrator_path:
            resnet50_onnx_model_file = _make_resnet50_onnx_model_file(file_path)
            resnet50_onnx_model = onnx_compiler.compile_source(source=ONNXModelFile(resnet50_onnx_model_file),
                                                               config=OnnxConfig(
                                                                   input_formats=[DataFormat.CHANNELS_FIRST]))
            calibration_dataset_sample = _make_calibration_dataset_simple(file_path)

            calibrator_path.write(
              "import tensorrt as trt\n"
              "import pycuda.driver as cuda\n"
              "import pycuda.autoinit\n"
              "import os\n"
              "import numpy as np\n"
              "from PIL import Image\n"
              "import torchvision.transforms as transforms\n"
              "\n\n"
              "class MyCalibrator(trt.IInt8EntropyCalibrator2):\n"
              "    def __init__(self, calibration_dataset, batch_size):\n"
              "        trt.IInt8EntropyCalibrator2.__init__(self)\n"
              "        self.cache_file = None\n"
              "        self.batch_size = batch_size\n"
              "        self.channel, self.width, self.height = 3, 224, 224\n"
              "        txt_file = open(os.path.join(calibration_dataset, '..', 'calibration_data.txt'), 'r')\n"
              "        image_lines = txt_file.readlines()\n"
              "        self.images = [os.path.join(calibration_dataset, image_line.split('\n')[0])"
              "                       for image_line in image_lines]\n"
              "        self.data_shape = (self.batch_size, self.channel, self.height, self.width)"
              "        self.data_size = trt.volume(list(self.data_shape)) * trt.float32.itemsize\n"
              "        self.current_index = 0"
              "        self.device_input = cuda.mem_alloc(self.data_size)\n"
              "\n"
              "    def get_batch_size(self):\n"
              "        return self.batch_size\n"
              "\n"
              "    def get_batch(self, names, p_str=None):\n"
              "        img_transform = transforms.Compose([transforms.Resize([self.height, self.width]),"
              "                                            transforms.ToTensor(), ])\n"
              "        batch_images = np.zeros(self.data_shape, dtype=np.float32)\n"
              "        img = Image.open(self.images[0])\n"
              "        batch_images[0] = img_transform(img).numpy()\n"
              "        if self.current_index + self.batch_size > len(batch_images):"
              "            return None"
              "        self.current_index += self.batch_size"
              "        cuda.memcpy_htod(self.device_input, batch_images.astype(np.float32))\n"
              "        return [int(self.device_input)]\n"
              "\n"
              "    def read_calibration_cache(self):\n"
              "        return self.cache_file\n"
              "\n"
              "    def write_calibration_cache(self, cache):\n"
              "        self.cache_file = cache\n")
            calibrator_path.seek(0)
            compiled = compiler.compile_source(source=resnet50_onnx_model,
                                               config=TensorrtConfig(max_batch_size=1, data_type='INT8',
                                                                     calibration_dataset=calibration_dataset_sample,
                                                                     calibrator=calibrator_path.name))
        self.assertEqual(compiled.get_inputs(), [ModelOutput(name='input.1', data_type=TfDataType.DT_FLOAT,
                                                             dims=[3, 224, 224])])
        self.assertEqual(compiled.input_data_formats, [DataFormat.CHANNELS_FIRST])
        self.assertEqual(compiled.get_outputs(), [ModelOutput(name='495', data_type=TfDataType.DT_FLOAT, dims=[4])])
        self.assertIsInstance(compiled.cuda_engine, ICudaEngine)