Beispiel #1
0
 def test_loader_explicit_precision(self):
     builder, network, parser = network_from_onnx_bytes(
         ONNX_MODELS["identity"].loader, explicit_precision=True)
     with builder, network, parser:
         assert not network.has_implicit_batch_dimension
         if mod.version(trt.__version__) < mod.version("8.0"):
             assert network.has_explicit_precision
Beispiel #2
0
    def test_infer_overhead(self, copy_inputs, copy_outputs):
        inp = np.ones(shape=(1, 2, 1024, 1024), dtype=np.float32)
        dev_inp = cuda.DeviceArray(shape=inp.shape, dtype=inp.dtype).copy_from(inp)

        out = np.zeros(shape=(1, 2, 1024, 1024), dtype=np.float32)  # Using identity model!
        dev_out = cuda.DeviceArray(shape=out.shape, dtype=out.dtype)

        stream = cuda.Stream()

        model = ONNX_MODELS["dynamic_identity"]
        profiles = [
            Profile().add("X", (1, 2, 1024, 1024), (1, 2, 1024, 1024), (1, 2, 1024, 1024)),
        ]
        inp_name = list(model.input_metadata.keys())[0]

        with engine_from_network(
            network_from_onnx_bytes(model.loader), CreateConfig(profiles=profiles)
        ) as engine, engine.create_execution_context() as context, TrtRunner(context) as runner, dev_inp, dev_out:
            # Inference outside the TrtRunner
            def infer():
                if copy_inputs:
                    dev_inp.copy_from(inp, stream=stream)
                context.execute_async_v2(bindings=[dev_inp.ptr, dev_out.ptr], stream_handle=stream.ptr)
                if copy_outputs:
                    dev_out.copy_to(out, stream=stream)
                stream.synchronize()

            native_time = time_func(infer)

            feed_dict = {inp_name: (inp if copy_inputs else dev_inp)}
            runner_time = time_func(
                lambda: runner.infer(feed_dict, check_inputs=False, copy_outputs_to_host=copy_outputs)
            )

        # The overhead should be less than 0.5ms, or the runtime should be within 5%
        print("Absolute difference: {:.5g}".format(runner_time - native_time))
        print("Relative difference: {:.5g}".format(runner_time / native_time))
        assert (runner_time - native_time) < 0.5e-3 or runner_time <= (native_time * 1.05)
Beispiel #3
0
def reshape_network():
    builder, network, parser = network_from_onnx_bytes(
        ONNX_MODELS["reshape"].loader)
    with builder, network, parser:
        yield builder, network, parser
Beispiel #4
0
def identity_identity_network():
    builder, network, parser = network_from_onnx_bytes(
        ONNX_MODELS["identity_identity"].loader)
    with builder, network, parser:
        yield builder, network, parser
Beispiel #5
0
 def test_loader(self):
     builder, network, parser = network_from_onnx_bytes(
         ONNX_MODELS["identity"].loader)
     with builder, network, parser:
         assert not network.has_implicit_batch_dimension
         assert not network.has_explicit_precision
Beispiel #6
0
def multi_input_builder_network():
    builder, network, parser = network_from_onnx_bytes(
        ONNX_MODELS["reducable"].loader)
    with builder, network, parser:
        yield builder, network