예제 #1
0
 def generate_multidata(num_batches):
     for _ in range(num_batches):
         yield {
             "x0":
             np.zeros((4, 5), dtype=np.float32),
             "x1":
             cuda.DeviceArray(dtype=np.float32).copy_from(
                 np.ones((4, 5), dtype=np.float32)),
             "x2":
             cuda.DeviceArray(dtype=np.float32).copy_from(
                 np.ones((4, 5), dtype=np.float32) * 2).ptr,
         }
예제 #2
0
        def allocate_buffers(engine):
            input_buffers = OrderedDict()
            output_buffers = OrderedDict()
            bindings = []
            stream = cuda.Stream()
            G_LOGGER.verbose("Using batch size: " +
                             str(engine.max_batch_size) +
                             " during buffer allocation")
            for binding in engine:
                shape = (engine.max_batch_size, ) + tuple(
                    engine.get_binding_shape(binding))
                dtype = engine.get_binding_dtype(binding)

                device_mem = cuda.DeviceArray(shape=shape,
                                              dtype=trt.nptype(dtype))
                G_LOGGER.extra_verbose("Tensor: "
                                       "{:35} | Allocated: {:}".format(
                                           binding, device_mem))

                if engine.binding_is_input(binding):
                    input_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        None, device_mem)
                else:
                    host_mem = np.empty(shape=shape, dtype=trt.nptype(dtype))
                    output_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        host_mem, device_mem)
            return input_buffers, output_buffers, stream
예제 #3
0
 def test_cannot_use_device_view_shape_tensor(self):
     model = ONNX_MODELS["empty_tensor_expand"]
     with TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(
             model.loader))) as runner, cuda.DeviceArray(
                 shape=(5, ), dtype=np.int32) as arr:
         with pytest.raises(PolygraphyException,
                            match="it must reside in host memory"):
             runner.infer({
                 "data": np.ones((2, 0, 3, 0), dtype=np.float32),
                 "new_shape": arr
             })
예제 #4
0
    def test_subsequent_infers_with_different_input_types(self):
        model = ONNX_MODELS["identity"]
        network_loader = NetworkFromOnnxBytes(model.loader)
        with TrtRunner(EngineFromNetwork(network_loader)) as runner:
            inp = np.ones(shape=(1, 1, 2, 2), dtype=np.float32)

            def check(outputs):
                assert np.all(outputs["y"] == inp)

            check(runner.infer({"x": inp}))
            check(runner.infer({"x": cuda.DeviceArray().copy_from(inp)}))
            check(runner.infer({"x": inp}))
예제 #5
0
 def test_device_view_dynamic_shapes(self, use_view):
     model = ONNX_MODELS["dynamic_identity"]
     profiles = [
         Profile().add("X", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)),
     ]
     runner = TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(model.loader), CreateConfig(profiles=profiles)))
     with runner, cuda.DeviceArray(shape=(1, 2, 3, 3), dtype=np.float32) as arr:
         inp = np.random.random_sample(size=(1, 2, 3, 3)).astype(np.float32)
         arr.copy_from(inp)
         outputs = runner.infer({"X": cuda.DeviceView(arr.ptr, arr.shape, arr.dtype) if use_view else arr})
         assert np.all(outputs["Y"] == inp)
         assert outputs["Y"].shape == (1, 2, 3, 3)
예제 #6
0
 def test_device_views(self, use_view):
     model = ONNX_MODELS["reducable"]
     network_loader = NetworkFromOnnxBytes(model.loader)
     with TrtRunner(EngineFromNetwork(network_loader)) as runner, cuda.DeviceArray((1,), dtype=np.float32) as x:
         x.copy_from(np.ones((1,), dtype=np.float32))
         outputs = runner.infer(
             {
                 "X0": x.view() if use_view else x,
                 "Y0": np.ones((1,), dtype=np.float32),
             }
         )
         assert outputs["identity_out_6"][0] == 2
         assert outputs["identity_out_8"][0] == 2
예제 #7
0
 def generate_dev_data(num_batches):
     with cuda.DeviceArray(shape=(1, ), dtype=np.float32) as x:
         for _ in range(num_batches):
             x.copy_from(np.ones((1, ), dtype=np.float32))
             xdata = {
                 "array": x,
                 "view": cuda.DeviceView(x.ptr, x.shape, x.dtype),
                 "pointer": x.ptr
             }[mode]
             yield {
                 "X0": xdata,
                 "Y0": np.zeros((1, ), dtype=np.float32)
             }
예제 #8
0
    def test_infer_overhead(self, copy_inputs, copy_outputs):
        inp = np.ones(shape=(1, 2, 1024, 1024), dtype=np.float32)
        dev_inp = cuda.DeviceArray(shape=inp.shape, dtype=inp.dtype).copy_from(inp)

        out = np.zeros(shape=(1, 2, 1024, 1024), dtype=np.float32)  # Using identity model!
        dev_out = cuda.DeviceArray(shape=out.shape, dtype=out.dtype)

        stream = cuda.Stream()

        model = ONNX_MODELS["dynamic_identity"]
        profiles = [
            Profile().add("X", (1, 2, 1024, 1024), (1, 2, 1024, 1024), (1, 2, 1024, 1024)),
        ]
        inp_name = list(model.input_metadata.keys())[0]

        with engine_from_network(
            network_from_onnx_bytes(model.loader), CreateConfig(profiles=profiles)
        ) as engine, engine.create_execution_context() as context, TrtRunner(context) as runner, dev_inp, dev_out:
            # Inference outside the TrtRunner
            def infer():
                if copy_inputs:
                    dev_inp.copy_from(inp, stream=stream)
                context.execute_async_v2(bindings=[dev_inp.ptr, dev_out.ptr], stream_handle=stream.ptr)
                if copy_outputs:
                    dev_out.copy_to(out, stream=stream)
                stream.synchronize()

            native_time = time_func(infer)

            feed_dict = {inp_name: (inp if copy_inputs else dev_inp)}
            runner_time = time_func(
                lambda: runner.infer(feed_dict, check_inputs=False, copy_outputs_to_host=copy_outputs)
            )

        # The overhead should be less than 0.5ms, or the runtime should be within 5%
        print("Absolute difference: {:.5g}".format(runner_time - native_time))
        print("Relative difference: {:.5g}".format(runner_time / native_time))
        assert (runner_time - native_time) < 0.5e-3 or runner_time <= (native_time * 1.05)
예제 #9
0
        def get_batch(self, names):
            if not self.is_active:
                G_LOGGER.error(
                    "Calibrator must be activated prior to use. Please use a context manager. "
                    "For example:\nwith calibrator:\n\t# Use calibrator here")
                return None

            try:
                buffers = next(self.data_loader_iter)
            except StopIteration:
                if not self.num_batches:
                    G_LOGGER.error(
                        "Calibrator data loader provided no data.\nPossible reasons for this include:\n(1) data loader "
                        "has no data to provide\n(2) data loader was a generator, and the calibrator is being "
                        "used multiple times (generators cannot be rewound)")
                return None
            else:
                self.num_batches += 1

            if not util.check_dict_contains(buffers,
                                            names,
                                            dict_name="calibration data",
                                            log_func=G_LOGGER.error):
                return None

            ptrs = []
            for name in names:
                buf = buffers[name]

                if isinstance(buf, cuda.DeviceView):
                    ptrs.append(buf.ptr)
                elif isinstance(buf, np.ndarray):
                    if name not in self.device_buffers:
                        self.device_buffers[name] = cuda.DeviceArray(
                            shape=buf.shape, dtype=buf.dtype)
                        G_LOGGER.verbose("Allocated: {:}".format(
                            self.device_buffers[name]))

                    ptrs.append(self.device_buffers[name].copy_from(buf).ptr)
                elif isinstance(buf, int):
                    ptrs.append(buf)
                else:
                    G_LOGGER.error(
                        "Calibration data loader provided an unrecognized type: {:} for input: {:}.\n"
                        "Please provide either a NumPy array, Polygraphy DeviceView, or GPU pointer. "
                        .format(type(buf).__name__, name))
                    return None

            return ptrs
예제 #10
0
        def make_buffers(engine):
            """
            Creates empty host and device buffers for the specified engine.
            Always uses binding names from Profile 0.
            """
            device_buffers = OrderedDict()
            host_output_buffers = OrderedDict()

            for idx in range(trt_util.get_bindings_per_profile(engine)):
                binding = engine[idx]
                dtype = trt_util.np_dtype_from_trt(engine.get_binding_dtype(binding))
                device_buffers[binding] = cuda.DeviceArray(dtype=dtype)
                if not engine.binding_is_input(binding):
                    host_output_buffers[binding] = np.empty(shape=tuple(), dtype=dtype)
            G_LOGGER.extra_verbose("Created device buffers: {:}".format(device_buffers))
            return device_buffers, host_output_buffers