コード例 #1
0
def test_conv_relu():
    p = migraphx.parse_onnx("conv_relu_maxpool_test.onnx")
    print(p)
    print("Compiling ...")
    p.compile(migraphx.get_target("gpu"))
    print(p)
    params = {}

    for key, value in p.get_parameter_shapes().items():
        print("Parameter {} -> {}".format(key, value))
        params[key] = migraphx.generate_argument(value)

    r = p.run(params)
    print(r)
コード例 #2
0
import migraphx

p = migraphx.parse_onnx("conv_relu_maxpool_test.onnx")
print(p)
print("Compiling ...")
p.compile(migraphx.get_target("gpu"), offload_copy=False)
print(p)
params = {}

for key, value in p.get_parameter_shapes().items():
    print("Parameter {} -> {}".format(key, value))
    params[key] = migraphx.to_gpu(migraphx.generate_argument(value))

r = migraphx.from_gpu(p.run(params))
print(r)
コード例 #3
0
def run(p):
    params = {}
    for key, value in p.get_parameter_shapes().items():
        params[key] = migraphx.generate_argument(value)

    return p.run(params)
コード例 #4
0
def infer_gpu(model, device, data_type, input_size, output_size, batch_size,
              args):
    data = torch.randn(batch_size, input_size, device="cuda")

    if data_type == "float16":
        data = data.half()
        model_final = model.half()
        if args.use_trt:
            print("Creating TRT model")
            from torch_tensorrt.fx.lower import (
                lower_to_trt, )
            from torch_tensorrt.fx.utils import LowerPrecision
            model_final = lower_to_trt(
                model_final,
                [data],
                max_batch_size=batch_size,
                explicit_batch_dimension=False,
                max_workspace_size=4 << 30,
                lower_precision=LowerPrecision.FP16,
            )
    else:
        model_final = model

    if args.use_migraphx:
        torch.onnx.export(
            model_final,
            torch.randn(batch_size,
                        input_size,
                        device="cuda",
                        dtype=torch.float16
                        if data_type == "float16" else torch.float32),
            "benchmark.onnx",
            input_names=["input"],
            output_names=["output"],
        )
        import migraphx
        migraphx_program = migraphx.parse_onnx("benchmark.onnx")
        migraphx_program.compile(migraphx.get_target("gpu"),
                                 offload_copy=False)

    torch.cuda.synchronize()
    start_event = torch.cuda.Event(enable_timing=True)
    end_event = torch.cuda.Event(enable_timing=True)
    total_time = 0.0

    for i in range(args.steps + args.warmups):
        data = torch.randn(batch_size, input_size, device="cuda")

        if data_type == "float16":
            data = data.half()

        if args.use_migraphx:
            params = {}
            for key, value in migraphx_program.get_parameter_shapes().items():
                params[key] = migraphx.to_gpu(
                    migraphx.generate_argument(value))

        if i >= args.warmups:
            start_event.record()

        if args.use_migraphx:
            migraphx_program.run(params)
        else:
            model_final(data)

        if i >= args.warmups:
            if args.use_migraphx:
                torch.cuda.synchronize()
            end_event.record()
            torch.cuda.synchronize()
            total_time += start_event.elapsed_time(end_event) * 1.0e-3

    return (total_time)