while line: token = line.split() filename = token[0] label = token[1] img_pil = Image.open(imagedir + "/" + filename) if img_pil.mode != 'RGB': img_pil2 = Image.new("RGB", img_pil.size) img_pil2.paste(img_pil) img_pil = img_pil2 img_tensor = preprocess(img_pil) img_tensor.unsqueeze_(0) img_variable = Variable(img_tensor) image = img_variable.numpy() try: tmp_result = migraphx.to_gpu(migraphx.argument(image)) params['0'] = tmp_result result = np.array(migraphx.from_gpu(model.run(params)), copy=False) maxval = result.argmax() maxlist = result.argsort() if int(label) == maxlist[0][999]: ilabel = 'first' elif int(label) == maxlist[0][998]: ilabel = 'second' elif int(label) == maxlist[0][997]: ilabel = 'third' elif int(label) == maxlist[0][996]: ilabel = 'fourth' elif int(label) == maxlist[0][995]: ilabel = 'fifth' else:
import migraphx p = migraphx.parse_onnx("conv_relu_maxpool_test.onnx") print(p) print("Compiling ...") p.compile(migraphx.get_target("gpu"), offload_copy=False) print(p) params = {} for key, value in p.get_parameter_shapes().items(): print("Parameter {} -> {}".format(key, value)) params[key] = migraphx.to_gpu(migraphx.generate_argument(value)) r = migraphx.from_gpu(p.run(params)) print(r)
def run(p): params = {} for key, value in p.get_parameter_shapes().items(): params[key] = migraphx.to_gpu(migraphx.generate_argument(value)) return migraphx.from_gpu(p.run(params))
finish_time = time.time() result = np.array(y_out) idx = np.argmax(result[0]) print('Tensorflow: ') print('IDX = ', idx) print('Time = ', '{:8.3f}'.format(finish_time - start_time)) elif framework == 'migraphx': import migraphx graph = migraphx.parse_tf(save_file) if fp16: graph.quantize_fp16() graph.compile(migraphx.get_target("gpu"), offload_copy=False) # allocate space with random params params = {} for key, value in graph.get_parameter_shapes().items(): params[key] = migraphx.allocate_gpu(value) image = load_image(image_file, batch) for i in range(repeat): params['input'] = migraphx.to_gpu(migraphx.argument(image)) result = np.array(migraphx.from_gpu(graph.run(params)), copy=False) start_time = time.time() for i in range(repeat): params['input'] = migraphx.to_gpu(migraphx.argument(image)) result = np.array(migraphx.from_gpu(graph.run(params)), copy=False) finish_time = time.time() idx = np.argmax(result[0]) print('MIGraphX: ') print('IDX = ', idx) print('Time = ', '{:8.3f}'.format(finish_time - start_time))
def infer_gpu(model, device, data_type, input_size, output_size, batch_size, args): data = torch.randn(batch_size, input_size, device="cuda") if data_type == "float16": data = data.half() model_final = model.half() if args.use_trt: print("Creating TRT model") from torch_tensorrt.fx.lower import ( lower_to_trt, ) from torch_tensorrt.fx.utils import LowerPrecision model_final = lower_to_trt( model_final, [data], max_batch_size=batch_size, explicit_batch_dimension=False, max_workspace_size=4 << 30, lower_precision=LowerPrecision.FP16, ) else: model_final = model if args.use_migraphx: torch.onnx.export( model_final, torch.randn(batch_size, input_size, device="cuda", dtype=torch.float16 if data_type == "float16" else torch.float32), "benchmark.onnx", input_names=["input"], output_names=["output"], ) import migraphx migraphx_program = migraphx.parse_onnx("benchmark.onnx") migraphx_program.compile(migraphx.get_target("gpu"), offload_copy=False) torch.cuda.synchronize() start_event = torch.cuda.Event(enable_timing=True) end_event = torch.cuda.Event(enable_timing=True) total_time = 0.0 for i in range(args.steps + args.warmups): data = torch.randn(batch_size, input_size, device="cuda") if data_type == "float16": data = data.half() if args.use_migraphx: params = {} for key, value in migraphx_program.get_parameter_shapes().items(): params[key] = migraphx.to_gpu( migraphx.generate_argument(value)) if i >= args.warmups: start_event.record() if args.use_migraphx: migraphx_program.run(params) else: model_final(data) if i >= args.warmups: if args.use_migraphx: torch.cuda.synchronize() end_event.record() torch.cuda.synchronize() total_time += start_event.elapsed_time(end_event) * 1.0e-3 return (total_time)