def test_error_on_wrong_dtype_feed_dict(self): model = ONNX_MODELS["identity"] network_loader = NetworkFromOnnxBytes(model.loader) with TrtRunner(EngineFromNetwork(network_loader)) as runner: with pytest.raises(PolygraphyException, match="unexpected dtype."): runner.infer( {"x": np.ones(shape=(1, 1, 2, 2), dtype=np.int32)})
def test_loader_explicit_precision(self): builder, network, parser = func.invoke( NetworkFromOnnxBytes(ONNX_MODELS["identity"].loader, explicit_precision=True)) with builder, network, parser: assert not network.has_implicit_batch_dimension assert network.has_explicit_precision
def test_device_buffer_order_matches_bindings(self): model = ONNX_MODELS["reducable"] engine = engine_from_network(NetworkFromOnnxBytes(model.loader)) with engine, TrtRunner(engine) as runner: dev_buf_order = list(runner.device_buffers.keys()) for binding, dev_buf_name in zip(engine, dev_buf_order): assert binding == dev_buf_name
def test_basic(self): model = ONNX_MODELS["identity"] network_loader = NetworkFromOnnxBytes(model.loader) with TrtRunner(EngineFromNetwork(network_loader)) as runner: assert runner.is_active model.check_runner(runner) assert not runner.is_active
def test_context(self): model = ONNX_MODELS["identity"] engine = engine_from_network(NetworkFromOnnxBytes(model.loader)) with engine, TrtRunner(engine.create_execution_context) as runner: model.check_runner(runner) assert not runner.owns_engine assert runner.owns_context
def test_cannot_use_device_view_shape_tensor(self): model = ONNX_MODELS["empty_tensor_expand"] with TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(model.loader))) as runner, cuda.DeviceArray( shape=(5,), dtype=np.int32 ) as arr: with pytest.raises(PolygraphyException, match="it must reside in host memory"): runner.infer({"data": np.ones((2, 0, 3, 0), dtype=np.float32), "new_shape": arr})
def test_no_output_copy(self): model = ONNX_MODELS["identity"] network_loader = NetworkFromOnnxBytes(model.loader) with TrtRunner(EngineFromNetwork(network_loader)) as runner: inp = np.ones(shape=(1, 1, 2, 2), dtype=np.float32) outputs = runner.infer({"x": inp}, copy_outputs_to_host=False) assert isinstance(outputs["y"], cuda.DeviceView) assert np.array_equal(outputs["y"].numpy(), inp)
def test_error_on_wrong_name_feed_dict(self, names, err): model = ONNX_MODELS["identity"] network_loader = NetworkFromOnnxBytes(model.loader) with TrtRunner(EngineFromNetwork(network_loader)) as runner: with pytest.raises(PolygraphyException, match=err): runner.infer({ name: np.ones(shape=(1, 1, 2, 2), dtype=np.float32) for name in names })
def test_multirun_outputs_are_different(self): onnx_loader = ONNX_MODELS["identity"].loader runner = TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(onnx_loader))) run_results = Comparator.run([runner], data_loader=DataLoader(iterations=2)) iteration0 = run_results[runner.name][0] iteration1 = run_results[runner.name][1] for name in iteration0.keys(): assert np.any(iteration0[name] != iteration1[name])
def test_basic(self): model = ONNX_MODELS["identity"] network_loader = NetworkFromOnnxBytes(model.loader) with TrtRunner(EngineFromNetwork(network_loader)) as runner: assert runner.is_active assert runner.owns_engine assert runner.owns_context model.check_runner(runner) assert not runner.is_active assert runner._cached_input_metadata is None
def test_basic(self): model = ONNX_MODELS["identity"] network_loader = NetworkFromOnnxBytes(model.loader) with TrtRunner(EngineFromNetwork(network_loader)) as runner: assert runner.is_active assert runner.owns_engine assert runner.owns_context model.check_runner(runner) assert runner.last_inference_time() is not None assert not runner.is_active
def test_empty_tensor_with_dynamic_input_shape_tensor(self): model = ONNX_MODELS["empty_tensor_expand"] shapes = [(1, 2, 0, 3, 0), (2, 2, 0, 3, 0), (4, 2, 0, 3, 0)] network_loader = NetworkFromOnnxBytes(model.loader) profiles = [Profile().add("new_shape", *shapes)] config_loader = CreateConfig(profiles=profiles) with TrtRunner(EngineFromNetwork(network_loader, config_loader)) as runner: for shape in shapes: model.check_runner(runner, {"new_shape": shape})
def test_multithreaded_runners_from_engine(self): model = ONNX_MODELS["identity"] engine = engine_from_network(NetworkFromOnnxBytes(model.loader)) with engine, TrtRunner(engine) as runner0, TrtRunner(engine) as runner1: t1 = threading.Thread(target=model.check_runner, args=(runner0,)) t2 = threading.Thread(target=model.check_runner, args=(runner1,)) t1.start() t2.start() t1.join() t2.join()
def test_subsequent_infers_with_different_input_types(self): model = ONNX_MODELS["identity"] network_loader = NetworkFromOnnxBytes(model.loader) with TrtRunner(EngineFromNetwork(network_loader)) as runner: inp = np.ones(shape=(1, 1, 2, 2), dtype=np.float32) def check(outputs): assert np.all(outputs["y"] == inp) check(runner.infer({"x": inp})) check(runner.infer({"x": cuda.DeviceArray().copy_from(inp)})) check(runner.infer({"x": inp}))
def test_device_view_dynamic_shapes(self, use_view): model = ONNX_MODELS["dynamic_identity"] profiles = [ Profile().add("X", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)), ] runner = TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(model.loader), CreateConfig(profiles=profiles))) with runner, cuda.DeviceArray(shape=(1, 2, 3, 3), dtype=np.float32) as arr: inp = np.random.random_sample(size=(1, 2, 3, 3)).astype(np.float32) arr.copy_from(inp) outputs = runner.infer({"X": cuda.DeviceView(arr.ptr, arr.shape, arr.dtype) if use_view else arr}) assert np.all(outputs["Y"] == inp) assert outputs["Y"].shape == (1, 2, 3, 3)
def test_device_views(self, use_view): model = ONNX_MODELS["reducable"] network_loader = NetworkFromOnnxBytes(model.loader) with TrtRunner(EngineFromNetwork(network_loader)) as runner, cuda.DeviceArray((1,), dtype=np.float32) as x: x.copy_from(np.ones((1,), dtype=np.float32)) outputs = runner.infer( { "X0": x.view() if use_view else x, "Y0": np.ones((1,), dtype=np.float32), } ) assert outputs["identity_out_6"][0] == 2 assert outputs["identity_out_8"][0] == 2
def test_multiple_profiles(self): model = ONNX_MODELS["dynamic_identity"] shapes = [(1, 2, 4, 4), (1, 2, 8, 8), (1, 2, 16, 16)] network_loader = NetworkFromOnnxBytes(model.loader) profiles = [ Profile().add("X", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)), Profile().add("X", *shapes), ] config_loader = CreateConfig(profiles=profiles) with TrtRunner(EngineFromNetwork(network_loader, config_loader)) as runner: runner.context.active_optimization_profile = 1 for shape in shapes: model.check_runner(runner, {"X": shape})
def test_dim_param_trt_onnxrt(self): load_onnx_bytes = ONNX_MODELS["dim_param"].loader build_onnxrt_session = SessionFromOnnx(load_onnx_bytes) load_engine = EngineFromNetwork(NetworkFromOnnxBytes(load_onnx_bytes)) runners = [ OnnxrtRunner(build_onnxrt_session), TrtRunner(load_engine), ] run_results = Comparator.run(runners) compare_func = CompareFunc.simple(check_shapes=mod.version(trt.__version__) >= mod.version("7.0")) assert bool(Comparator.compare_accuracy(run_results, compare_func=compare_func)) assert len(list(run_results.values())[0]) == 1 # Default number of iterations
def build_engine(model, cache): if not path_mode: cache.seek(0) network_loader = NetworkFromOnnxBytes(ONNX_MODELS[model].loader) # In non-path_mode, use the file-like object directly. # Must load the cache with CreateConfig so that new data is appended # instead of overwriting the previous cache. loader = EngineFromNetwork( network_loader, CreateConfig(load_timing_cache=cache.name), save_timing_cache=cache.name if path_mode else cache, ) with loader(): pass if not path_mode: cache.seek(0)
def test_multiple_profiles(self): model = ONNX_MODELS["dynamic_identity"] profile0_shapes = [(1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)] profile1_shapes = [(1, 2, 4, 4), (1, 2, 8, 8), (1, 2, 16, 16)] network_loader = NetworkFromOnnxBytes(model.loader) profiles = [ Profile().add("X", *profile0_shapes), Profile().add("X", *profile1_shapes), ] config_loader = CreateConfig(profiles=profiles) with TrtRunner(EngineFromNetwork(network_loader, config_loader)) as runner: for index, shapes in enumerate([profile0_shapes, profile1_shapes]): runner.set_profile(index) for shape in shapes: model.check_runner(runner, {"X": shape})
def test_multiple_runners(self): load_tf = TF_MODELS["identity"].loader build_tf_session = SessionFromGraph(load_tf) load_serialized_onnx = BytesFromOnnx(OnnxFromTfGraph(load_tf)) build_onnxrt_session = SessionFromOnnxBytes(load_serialized_onnx) load_engine = EngineFromNetwork(NetworkFromOnnxBytes(load_serialized_onnx)) runners = [ TfRunner(build_tf_session), OnnxrtRunner(build_onnxrt_session), TrtRunner(load_engine), ] run_results = Comparator.run(runners) compare_func = CompareFunc.basic_compare_func(check_shapes=version(trt.__version__) >= version("7.0")) assert bool(Comparator.compare_accuracy(run_results, compare_func=compare_func)) assert len(list(run_results.values())[0]) == 1 # Default number of iterations
def test_multiple_profiles(self): model = ONNX_MODELS["dynamic_identity"] shapes = [(1, 2, 4, 4), (1, 2, 8, 8), (1, 2, 16, 16)] network_loader = NetworkFromOnnxBytes(model.loader) profiles = [ Profile().add("X", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)), Profile().add("X", *shapes), ] config_loader = CreateConfig(profiles=profiles) with TrtRunner(EngineFromNetwork(network_loader, config_loader)) as runner: if misc.version(trt.__version__) < misc.version("7.3"): runner.context.active_optimization_profile = 1 else: runner.context.set_optimization_profile_async( 1, runner.stream.address()) for shape in shapes: model.check_runner(runner, {"X": shape})
def modifiable_reshape_network(): # Must return a loader since the network will be modified each time it's loaded. return NetworkFromOnnxBytes(ONNX_MODELS["reshape"].loader)
def identity_engine(): network_loader = NetworkFromOnnxBytes(ONNX_MODELS["identity"].loader) engine_loader = EngineFromNetwork(network_loader, CreateConfig()) with engine_loader() as engine: yield engine
def test_onnx_like_from_network(self, model_name): assert onnx_like_from_network( NetworkFromOnnxBytes(ONNX_MODELS[model_name].loader))
def identity_builder_network(): builder, network, parser = NetworkFromOnnxBytes(ONNX_MODELS["identity"].loader)() with builder, network, parser: yield builder, network
def test_can_build_with_parser_owning(self): loader = EngineFromNetwork( NetworkFromOnnxBytes(ONNX_MODELS["identity"].loader)) with loader(): pass
def test_shape_output(self): model = ONNX_MODELS["reshape"] engine = engine_from_network(NetworkFromOnnxBytes(model.loader)) with engine, TrtRunner(engine.create_execution_context) as runner: model.check_runner(runner)
def identity_identity_network(): return NetworkFromOnnxBytes(ONNX_MODELS["identity_identity"].loader)
def reshape_network(): return NetworkFromOnnxBytes(ONNX_MODELS["reshape"].loader)