def tune_and_evaluate(tuning_opt): # extract workloads from relay program print("Extract tasks...") mod, params, data_shape, out_shape = get_network(model_name, batch_size) tasks = autotvm.task.extract_from_program( mod["main"], target=target, params=params, ops=(relay.op.get("nn.conv2d"),) ) # run tuning tasks tune_kernels(tasks, **tuning_opt) tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file) # compile kernels with graph-level best records with autotvm.apply_graph_best(graph_opt_sch_file): print("Compile...") with tvm.transform.PassContext(opt_level=3): lib = relay.build_module.build(mod, target=target, params=params) # upload parameters to device if pat == 0: ctx = tvm.cpu() if pat == 1: ctx = tvm.metal() data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype)) module = runtime.GraphModule(lib["default"](ctx)) module.set_input(input_name, data_tvm) # evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3) prof_res = np.array(ftimer().results) * 1000 # convert to millisecond print( "Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)) )
def enabled_ctx_list(): ctx_list = [('cpu', tvm.cpu(0)), ('gpu', tvm.gpu(0)), ('cl', tvm.opencl(0)), ('metal', tvm.metal(0)), ('rocm', tvm.rocm(0)), ('vpi', tvm.vpi(0))] for k, v in ctx_list: assert tvm.context(k, 0) == v ctx_list = [x[1] for x in ctx_list if x[1].exist] return ctx_list
def verify(A, B, C, target="llvm"): if not tvm.get_global_func("tvm.contrib.mps.conv2d", True): print("skip because extern function is not available") return ctx = tvm.metal(0) f = tvm.build(s1, [A, B, C], "metal") a = tvm.nd.array(np.random.uniform(size=(n, h, w, ci)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(co, kh, kw, ci)).astype(B.dtype), ctx) c = tvm.nd.array(np.zeros((n, h // stride, w // stride, co), dtype=C.dtype), ctx) f(a, b, c)
def verify(A, B, D, s, target="metal"): if not tvm.get_global_func("tvm.contrib.mps.matmul", True): print("skip because extern function is not available") return ctx = tvm.metal(0) f = tvm.build(s, [A, B, D], "metal") a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx) c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx) f(a, b, c) tvm.testing.assert_allclose(c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 1, rtol=1e-5)
def enabled_ctx_list(): ctx_list = [('cpu', tvm.cpu(0)), ('gpu', tvm.gpu(0)), ('cl', tvm.opencl(0)), ('metal', tvm.metal(0)), ('rocm', tvm.rocm(0)), ('vulkan', tvm.vulkan(0)), ('vpi', tvm.vpi(0))] for k, v in ctx_list: assert tvm.context(k, 0) == v ctx_list = [x[1] for x in ctx_list if x[1].exist] return ctx_list
def verify(A, B, D, s, target="metal"): if not tvm.get_global_func("tvm.contrib.mps.matmul", True): print("skip because extern function is not available") return ctx = tvm.metal(0) f = tvm.build(s, [A, B, D], "metal") a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx) c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx) f(a, b, c) tvm.testing.assert_allclose( c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 1, rtol=1e-5)
def main(): model_url = ''.join([ 'https://gist.github.com/zhreshold/', 'bcda4716699ac97ea44f791c24310193/raw/', '93672b029103648953c4e5ad3ac3aadf346a4cdc/', 'super_resolution_0.2.onnx' ]) download(model_url, 'super_resolution.onnx', True) # now you have super_resolution.onnx on disk onnx_model = onnx.load('super_resolution.onnx') # we can load the graph as NNVM compatible model sym, params = nnvm.frontend.from_onnx(onnx_model) img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true' download(img_url, 'cat.png') img = Image.open('cat.png').resize((224, 224)) img_ycbcr = img.convert("YCbCr") # convert to YCbCr img_y, img_cb, img_cr = img_ycbcr.split() x = np.array(img_y)[np.newaxis, np.newaxis, :, :] target = 'metal' # assume first input name is data input_name = sym.list_input_names()[0] shape_dict = {input_name: x.shape} graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params) ctx = tvm.metal(0) dtype = 'float32' m = graph_runtime.create(graph, lib, ctx) # set inputs m.set_input(input_name, tvm.nd.array(x.astype(dtype))) m.set_input(**params) # execute m.run() # get outputs output_shape = (1, 1, 672, 672) tvm_output = m.get_output(0, tvm.nd.empty(output_shape, dtype)).asnumpy() out_y = Image.fromarray(np.uint8((tvm_output[0, 0]).clip(0, 255)), mode='L') out_cb = img_cb.resize(out_y.size, Image.BICUBIC) out_cr = img_cr.resize(out_y.size, Image.BICUBIC) result = Image.merge('YCbCr', [out_y, out_cb, out_cr]).convert('RGB') # canvas = np.full((672, 672*2, 3), 255) # canvas[0:224, 0:224, :] = np.asarray(img) # canvas[:, 672:, :] = np.asarray(result) scipy.misc.imsave('./input.jpg', img) scipy.misc.imsave('./result.jpg', result)
def requires_gpu(*args): """Mark a test as requiring a GPU to run. Tests with this mark will not be run unless a gpu is present. Parameters ---------- f : function Function to mark """ _requires_gpu = [ pytest.mark.skipif( not tvm.cuda().exist and not tvm.rocm().exist and not tvm.opencl().exist and not tvm.metal().exist and not tvm.vulkan().exist, reason="No GPU present", ), *uses_gpu(), ] return _compose(args, _requires_gpu)
task.tune(tune_option) sch, args = task.apply_best(log_file) # Kill the process for measurement del measure_runner else: sch, args = task.apply_best(log_file) func = tvm.build(sch, args, target) # Check correctness a_np = np.random.uniform(size=(M, K)).astype(np.float32) b_np = np.random.uniform(size=(N, K)).astype(np.float32) c_np = np.dot(a_np, b_np.T) ctx = tvm.metal() #ctx = tvm.cpu() a_tvm = tvm.nd.array(a_np, ctx=ctx) b_tvm = tvm.nd.array(b_np, ctx=ctx) c_tvm = tvm.nd.array(c_np, ctx=ctx) func(a_tvm, b_tvm, c_tvm) # Check results np.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-3) # Evaluate execution time evaluator = func.time_evaluator(func.entry_name, ctx, min_repeat_ms=500,
def run_timing(device, platform, model, remote=None, autotvm_log=None, batch=1, runs=3, reps=5, log=None): """ Run a time trail on TVM :param device: The device to run this on :param platform: The platform get the machine learning model on :param model: The machine learning model to use :param remote: Details about the remote device :param autotvm_log: The path to the auto TVM file :param batch: The number of pictures to run in one go :param runs: The number of runs to run the picture through :param reps: The number of times the measurement should be repeated :param log: The output file """ # Output details of run from cpuinfo import get_cpu_info from datetime import datetime print("\n──────────────────────────── TVMUI ────────────────────────────\n") log.write("TVM Time Trial\n") log_print(log, "Started on " + str(datetime.now().strftime("%m/%d/%Y at %H:%M:%S"))) if remote is None: log_print(log, 'Hardware: ' + device) if device == 'x86': log_print(log, 'CPU Type: ' + get_cpu_info().get('brand_raw')) else: log_print(log, 'Remote Name: ' + remote["name"]) log_print(log, 'Remote Device: ' + remote["type"]) log_print(log, 'Remote Hardware: ' + remote["hardware"]) log_print(log, 'Backend: ' + platform) log_print(log, 'Model: ' + model) log_print(log, str(batch) + " picture(s) per run") log_print(log, str(runs) + " run average, repeated " + str(reps) + " times.") if autotvm_log is None: log_print(log, 'AutoTVM: No\n') else: log_print(log, 'AutoTVM: Yes\n') # Get the model and image data import numpy as np from PIL import Image from tvm import relay import tvm from tvm.contrib.download import download_testdata print("Loading models and images...") pictures = get_pics(batch) dataset = [] if platform == "MXNet": from mxnet.gluon.model_zoo.vision import get_model block = get_model(model, pretrained=True) synset_url = "".join( [ "https://gist.githubusercontent.com/zhreshold/", "4d0b62f3d01426887599d4f7ede23ee5/raw/", "596b27d23537e5a1b5751d2b0481ef172f58b539/", "imagenet1000_clsid_to_human.txt", ] ) synset_name = "imagenet1000_clsid_to_human.txt" synset_path = download_testdata(synset_url, synset_name, module="data") with open(synset_path) as f: synset = eval(f.read()) def transform_image(image): image = np.array(image) - np.array([123.0, 117.0, 104.0]) image /= np.array([58.395, 57.12, 57.375]) image = image.transpose((2, 0, 1)) image = image[np.newaxis, :] return image if model == 'resnet18_v1' or model == 'mobilenetv2_1.0': for img in pictures: dataset.append(transform_image(Image.open(img).resize((224, 224)))) input_shape = [batch, 3, 224, 224] elif model == 'inceptionv3': for img in pictures: dataset.append(transform_image(Image.open(img).resize((299, 299)))) input_shape = [batch, 3, 299, 299] else: raise Exception("Invalid Model") shape_dict = {"data": input_shape} mod, params = relay.frontend.from_mxnet(block, shape_dict) func = mod["main"] func = relay.Function(func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs) elif platform == "PyTorch": import torch import torchvision model = getattr(torchvision.models, model)(pretrained=True) model = model.eval() # We grab the TorchScripted model via tracing input_shape = [batch, 3, 224, 224] input_data = torch.randn(input_shape) scripted_model = torch.jit.trace(model, input_data).eval() synset_url = "".join( [ "https://raw.githubusercontent.com/Cadene/", "pretrained-models.pytorch/master/data/", "imagenet_synsets.txt", ] ) synset_name = "imagenet_synsets.txt" synset_path = download_testdata(synset_url, synset_name, module="data") with open(synset_path) as f: synsets = f.readlines() synsets = [x.strip() for x in synsets] splits = [line.split(" ") for line in synsets] key_to_classname = {spl[0]: " ".join(spl[1:]) for spl in splits} class_url = "".join( [ "https://raw.githubusercontent.com/Cadene/", "pretrained-models.pytorch/master/data/", "imagenet_classes.txt", ] ) class_name = "imagenet_classes.txt" class_path = download_testdata(class_url, class_name, module="data") with open(class_path) as f: class_id_to_key = f.readlines() class_id_to_key = [x.strip() for x in class_id_to_key] def transform_image(image): from torchvision import transforms my_preprocess = transforms.Compose( [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] ) img = my_preprocess(image) return np.expand_dims(img, 0) for img in pictures: dataset.append(transform_image(Image.open(img).resize((224, 224)))) input_name = "data" shape_list = [(input_name, input_shape)] func, params = relay.frontend.from_pytorch(scripted_model, shape_list) elif platform == "TensorFlow": import tensorflow as tf import os try: tf_compat_v1 = tf.compat.v1 except ImportError: tf_compat_v1 = tf import tvm.relay.testing.tf as tf_testing # Base location for model related files. repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/" model_name = "classify_image_graph_def-with_shapes.pb" model_url = os.path.join(repo_base, model_name) # Image label map map_proto = "imagenet_2012_challenge_label_map_proto.pbtxt" map_proto_url = os.path.join(repo_base, map_proto) # Human readable text for labels label_map = "imagenet_synset_to_human_label_map.txt" label_map_url = os.path.join(repo_base, label_map) model_path = download_testdata(model_url, model_name, module=["tf", "InceptionV1"]) map_proto_path = download_testdata(map_proto_url, map_proto, module="data") label_path = download_testdata(label_map_url, label_map, module="data") with tf_compat_v1.gfile.GFile(model_path, "rb") as f: graph_def = tf_compat_v1.GraphDef() graph_def.ParseFromString(f.read()) graph = tf.import_graph_def(graph_def, name="") # Call the utility to import the graph definition into default graph. graph_def = tf_testing.ProcessGraphDefParam(graph_def) # Add shapes to the graph. with tf_compat_v1.Session() as sess: graph_def = tf_testing.AddShapesToGraphDef(sess, "softmax") for img in pictures: dataset.append(np.array(Image.open(img).resize((299, 299)))) shape_dict = {"data": [batch, 3, 299, 299]} dtype_dict = {"DecodeJpeg/contents": "uint8"} mod, params = relay.frontend.from_tensorflow(graph_def, layout=None, shape=shape_dict) else: raise Exception('Not Supported!') # Build the graph if device == 'x86': target = "llvm" ctx = tvm.cpu(0) log_print(log, 'Target: ' + target) elif device == 'Metal': target = "metal" ctx = tvm.metal(0) log_print(log, 'Target: ' + target) elif device == 'arm_cpu': target = tvm.target.arm_cpu(remote["type"]) ctx = tvm.cpu(0) log_print(log, 'Target: ' + remote["type"]) else: target = device ctx = tvm.cpu(0) log_print(log, 'Target: ' + device) log_print(log, 'Actual Model: ' + model + '\n') print('Making the graph...') if autotvm_log is not None: from tvm import autotvm log_print(log, 'Using AutoTVM file ' + autotvm_log) with autotvm.apply_graph_best(autotvm_log): with tvm.transform.PassContext(opt_level=3): lib = relay.build(func, target, params=params) else: with tvm.transform.PassContext(opt_level=3): lib = relay.build(func, target, params=params) print("\nSetting up TVM...") from tvm.contrib import graph_runtime # Remote upload if remote is not None: from tvm import rpc from tvm.contrib import utils, graph_runtime as runtime print("Exporting graph...") tmp = utils.tempdir() lib_fname = tmp.relpath("net.tar") lib.export_library(lib_fname) print("Connecting to device...") remote = rpc.connect(str(remote["ip"]), int(remote["port"])) print("Uploading to device...") remote.upload(lib_fname) lib = remote.load_module("net.tar") if device == 'x86': ctx = remote.cpu(0) elif device == 'Metal': ctx = remote.metal(0) elif device == 'arm_cpu': ctx = remote.cpu(0) else: ctx = remote.cpu(0) dtype = "float32" m = graph_runtime.GraphModule(lib["default"](ctx)) def run_tvm(pics, number, repeat): """ Runs a single inference and gives back the time :param pics: The images(s) to run :param number: The number of times to run the inference :param repeat: The number of times to repeat the measurement :return: An array with the time and the result """ # combine pictures arr = np.ndarray(shape=input_shape, dtype=dtype) p = 0 for ip in pics: arr[p] = ip.astype(dtype) p = p + 1 m.set_input("data", tvm.nd.array(arr)) #Actually run inference time = m.module.time_evaluator("run", ctx, number=number, repeat=repeat)() #Get output res = [] if platform == 'MXNet': for i in range(len(pics)): res.append(synset[np.argmax(m.get_output(0).asnumpy()[i])]) if platform == 'PyTorch': # Get top-1 result for TVM for i in range(len(pics)): top1_tvm = np.argmax(m.get_output(0).asnumpy()[i]) tvm_class_key = class_id_to_key[top1_tvm] res.append(key_to_classname[tvm_class_key]) if platform == 'TensorFlow': pre = np.squeeze(m.get_output(0, tvm.nd.empty(((1, 1008)), "float32")).asnumpy()) node_lookup = tf_testing.NodeLookup(label_lookup_path=map_proto_path, uid_lookup_path=label_path) top_k = pre.argsort()[-5:][::-1] res = node_lookup.id_to_string(top_k[0]) return [time, res] # Run the inferences output = [] total = 0 print("\nRunning inferences...") for i in range(int(len(dataset) / batch)): log_print(log, "\nSet " + str(i + 1) + ":") inp = [] # Create the next batch for j in range(batch): inp.append(dataset[batch * i + j]) # Run inference here output = run_tvm(inp, runs, reps) # Output results e = 0 for rl in output[1]: log_print(log, "Image " + str(e + 1) + " Path: " + pictures[batch * i + e]) log_print(log, "Image " + str(e + 1) + " ID: " + rl) e = e + 1 log_print(log, "Time taken: " + str('%.2f' % (1000 * output[0].mean)) + " ms") total = total + output[0].mean ave = total / int(len(dataset) / batch) log_print(log, '\nAVERAGE TIME: ' + str(ave * 1000) + " ms") log_print(log, "Finished on " + str(datetime.now().strftime("%m/%d/%Y at %H:%M:%S"))) log.close() return