def test_loader_explicit_precision(self): builder, network, parser = network_from_onnx_bytes( ONNX_MODELS["identity"].loader, explicit_precision=True) with builder, network, parser: assert not network.has_implicit_batch_dimension if mod.version(trt.__version__) < mod.version("8.0"): assert network.has_explicit_precision
def test_infer_overhead(self, copy_inputs, copy_outputs): inp = np.ones(shape=(1, 2, 1024, 1024), dtype=np.float32) dev_inp = cuda.DeviceArray(shape=inp.shape, dtype=inp.dtype).copy_from(inp) out = np.zeros(shape=(1, 2, 1024, 1024), dtype=np.float32) # Using identity model! dev_out = cuda.DeviceArray(shape=out.shape, dtype=out.dtype) stream = cuda.Stream() model = ONNX_MODELS["dynamic_identity"] profiles = [ Profile().add("X", (1, 2, 1024, 1024), (1, 2, 1024, 1024), (1, 2, 1024, 1024)), ] inp_name = list(model.input_metadata.keys())[0] with engine_from_network( network_from_onnx_bytes(model.loader), CreateConfig(profiles=profiles) ) as engine, engine.create_execution_context() as context, TrtRunner(context) as runner, dev_inp, dev_out: # Inference outside the TrtRunner def infer(): if copy_inputs: dev_inp.copy_from(inp, stream=stream) context.execute_async_v2(bindings=[dev_inp.ptr, dev_out.ptr], stream_handle=stream.ptr) if copy_outputs: dev_out.copy_to(out, stream=stream) stream.synchronize() native_time = time_func(infer) feed_dict = {inp_name: (inp if copy_inputs else dev_inp)} runner_time = time_func( lambda: runner.infer(feed_dict, check_inputs=False, copy_outputs_to_host=copy_outputs) ) # The overhead should be less than 0.5ms, or the runtime should be within 5% print("Absolute difference: {:.5g}".format(runner_time - native_time)) print("Relative difference: {:.5g}".format(runner_time / native_time)) assert (runner_time - native_time) < 0.5e-3 or runner_time <= (native_time * 1.05)
def reshape_network(): builder, network, parser = network_from_onnx_bytes( ONNX_MODELS["reshape"].loader) with builder, network, parser: yield builder, network, parser
def identity_identity_network(): builder, network, parser = network_from_onnx_bytes( ONNX_MODELS["identity_identity"].loader) with builder, network, parser: yield builder, network, parser
def test_loader(self): builder, network, parser = network_from_onnx_bytes( ONNX_MODELS["identity"].loader) with builder, network, parser: assert not network.has_implicit_batch_dimension assert not network.has_explicit_precision
def multi_input_builder_network(): builder, network, parser = network_from_onnx_bytes( ONNX_MODELS["reducable"].loader) with builder, network, parser: yield builder, network