def test_conv_relu(): p = migraphx.parse_onnx("conv_relu_maxpool_test.onnx") print(p) print("Compiling ...") p.compile(migraphx.get_target("gpu")) print(p) params = {} for key, value in p.get_parameter_shapes().items(): print("Parameter {} -> {}".format(key, value)) params[key] = migraphx.generate_argument(value) r = p.run(params) print(r)
import migraphx p = migraphx.parse_onnx("conv_relu_maxpool_test.onnx") print(p) print("Compiling ...") p.compile(migraphx.get_target("gpu"), offload_copy=False) print(p) params = {} for key, value in p.get_parameter_shapes().items(): print("Parameter {} -> {}".format(key, value)) params[key] = migraphx.to_gpu(migraphx.generate_argument(value)) r = migraphx.from_gpu(p.run(params)) print(r)
def run(p): params = {} for key, value in p.get_parameter_shapes().items(): params[key] = migraphx.generate_argument(value) return p.run(params)
def infer_gpu(model, device, data_type, input_size, output_size, batch_size, args): data = torch.randn(batch_size, input_size, device="cuda") if data_type == "float16": data = data.half() model_final = model.half() if args.use_trt: print("Creating TRT model") from torch_tensorrt.fx.lower import ( lower_to_trt, ) from torch_tensorrt.fx.utils import LowerPrecision model_final = lower_to_trt( model_final, [data], max_batch_size=batch_size, explicit_batch_dimension=False, max_workspace_size=4 << 30, lower_precision=LowerPrecision.FP16, ) else: model_final = model if args.use_migraphx: torch.onnx.export( model_final, torch.randn(batch_size, input_size, device="cuda", dtype=torch.float16 if data_type == "float16" else torch.float32), "benchmark.onnx", input_names=["input"], output_names=["output"], ) import migraphx migraphx_program = migraphx.parse_onnx("benchmark.onnx") migraphx_program.compile(migraphx.get_target("gpu"), offload_copy=False) torch.cuda.synchronize() start_event = torch.cuda.Event(enable_timing=True) end_event = torch.cuda.Event(enable_timing=True) total_time = 0.0 for i in range(args.steps + args.warmups): data = torch.randn(batch_size, input_size, device="cuda") if data_type == "float16": data = data.half() if args.use_migraphx: params = {} for key, value in migraphx_program.get_parameter_shapes().items(): params[key] = migraphx.to_gpu( migraphx.generate_argument(value)) if i >= args.warmups: start_event.record() if args.use_migraphx: migraphx_program.run(params) else: model_final(data) if i >= args.warmups: if args.use_migraphx: torch.cuda.synchronize() end_event.record() torch.cuda.synchronize() total_time += start_event.elapsed_time(end_event) * 1.0e-3 return (total_time)