Esempio n. 1
0
    def tune_and_evaluate(tuning_opt):
        # extract workloads from relay program
        print("Extract tasks...")
        mod, params, data_shape, out_shape = get_network(model_name, batch_size)
        tasks = autotvm.task.extract_from_program(
            mod["main"], target=target, params=params, ops=(relay.op.get("nn.conv2d"),)
        )

        # run tuning tasks
        tune_kernels(tasks, **tuning_opt)
        tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file)

        # compile kernels with graph-level best records
        with autotvm.apply_graph_best(graph_opt_sch_file):
            print("Compile...")
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build_module.build(mod, target=target, params=params)

            # upload parameters to device
            if pat == 0:
                ctx = tvm.cpu()
            if pat == 1:
                ctx = tvm.metal()
            data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
            module = runtime.GraphModule(lib["default"](ctx))
            module.set_input(input_name, data_tvm)

            # evaluate
            print("Evaluate inference time cost...")
            ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
            prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
            print(
                "Mean inference time (std dev): %.2f ms (%.2f ms)"
                % (np.mean(prof_res), np.std(prof_res))
            )
Esempio n. 2
0
def enabled_ctx_list():
    ctx_list = [('cpu', tvm.cpu(0)), ('gpu', tvm.gpu(0)),
                ('cl', tvm.opencl(0)), ('metal', tvm.metal(0)),
                ('rocm', tvm.rocm(0)), ('vpi', tvm.vpi(0))]
    for k, v in ctx_list:
        assert tvm.context(k, 0) == v
    ctx_list = [x[1] for x in ctx_list if x[1].exist]
    return ctx_list
Esempio n. 3
0
 def verify(A, B, C, target="llvm"):
     if not tvm.get_global_func("tvm.contrib.mps.conv2d", True):
         print("skip because extern function is not available")
         return
     ctx = tvm.metal(0)
     f = tvm.build(s1, [A, B, C], "metal")
     a = tvm.nd.array(np.random.uniform(size=(n, h, w, ci)).astype(A.dtype), ctx)
     b = tvm.nd.array(np.random.uniform(size=(co, kh, kw, ci)).astype(B.dtype), ctx)
     c = tvm.nd.array(np.zeros((n, h // stride, w // stride, co), dtype=C.dtype), ctx)
     f(a, b, c)
Esempio n. 4
0
 def verify(A, B, D, s, target="metal"):
     if not tvm.get_global_func("tvm.contrib.mps.matmul", True):
         print("skip because extern function is not available")
         return
     ctx = tvm.metal(0)
     f = tvm.build(s, [A, B, D], "metal")
     a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
     b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
     c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
     f(a, b, c)
     tvm.testing.assert_allclose(c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 1, rtol=1e-5)
Esempio n. 5
0
def enabled_ctx_list():
    ctx_list = [('cpu', tvm.cpu(0)),
                ('gpu', tvm.gpu(0)),
                ('cl', tvm.opencl(0)),
                ('metal', tvm.metal(0)),
                ('rocm', tvm.rocm(0)),
                ('vulkan', tvm.vulkan(0)),
                ('vpi', tvm.vpi(0))]
    for k, v  in ctx_list:
        assert tvm.context(k, 0) == v
    ctx_list = [x[1] for x in ctx_list if x[1].exist]
    return ctx_list
Esempio n. 6
0
 def verify(A, B, D, s, target="metal"):
     if not tvm.get_global_func("tvm.contrib.mps.matmul", True):
         print("skip because extern function is not available")
         return
     ctx = tvm.metal(0)
     f = tvm.build(s, [A, B, D], "metal")
     a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
     b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
     c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
     f(a, b, c)
     tvm.testing.assert_allclose(
         c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 1, rtol=1e-5)
Esempio n. 7
0
def main():
    model_url = ''.join([
        'https://gist.github.com/zhreshold/',
        'bcda4716699ac97ea44f791c24310193/raw/',
        '93672b029103648953c4e5ad3ac3aadf346a4cdc/',
        'super_resolution_0.2.onnx'
    ])
    download(model_url, 'super_resolution.onnx', True)
    # now you have super_resolution.onnx on disk
    onnx_model = onnx.load('super_resolution.onnx')
    # we can load the graph as NNVM compatible model
    sym, params = nnvm.frontend.from_onnx(onnx_model)

    img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
    download(img_url, 'cat.png')
    img = Image.open('cat.png').resize((224, 224))
    img_ycbcr = img.convert("YCbCr")  # convert to YCbCr
    img_y, img_cb, img_cr = img_ycbcr.split()
    x = np.array(img_y)[np.newaxis, np.newaxis, :, :]

    target = 'metal'
    # assume first input name is data
    input_name = sym.list_input_names()[0]
    shape_dict = {input_name: x.shape}
    graph, lib, params = nnvm.compiler.build(sym,
                                             target,
                                             shape_dict,
                                             params=params)

    ctx = tvm.metal(0)
    dtype = 'float32'
    m = graph_runtime.create(graph, lib, ctx)
    # set inputs
    m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
    m.set_input(**params)
    # execute
    m.run()
    # get outputs
    output_shape = (1, 1, 672, 672)
    tvm_output = m.get_output(0, tvm.nd.empty(output_shape, dtype)).asnumpy()

    out_y = Image.fromarray(np.uint8((tvm_output[0, 0]).clip(0, 255)),
                            mode='L')
    out_cb = img_cb.resize(out_y.size, Image.BICUBIC)
    out_cr = img_cr.resize(out_y.size, Image.BICUBIC)
    result = Image.merge('YCbCr', [out_y, out_cb, out_cr]).convert('RGB')
    # canvas = np.full((672, 672*2, 3), 255)
    # canvas[0:224, 0:224, :] = np.asarray(img)
    # canvas[:, 672:, :] = np.asarray(result)
    scipy.misc.imsave('./input.jpg', img)
    scipy.misc.imsave('./result.jpg', result)
Esempio n. 8
0
def requires_gpu(*args):
    """Mark a test as requiring a GPU to run.

    Tests with this mark will not be run unless a gpu is present.

    Parameters
    ----------
    f : function
        Function to mark
    """
    _requires_gpu = [
        pytest.mark.skipif(
            not tvm.cuda().exist and not tvm.rocm().exist
            and not tvm.opencl().exist and not tvm.metal().exist
            and not tvm.vulkan().exist,
            reason="No GPU present",
        ),
        *uses_gpu(),
    ]
    return _compose(args, _requires_gpu)
Esempio n. 9
0
        task.tune(tune_option)
        sch, args = task.apply_best(log_file)

        # Kill the process for measurement
        del measure_runner
    else:
        sch, args = task.apply_best(log_file)

    func = tvm.build(sch, args, target)

    # Check correctness
    a_np = np.random.uniform(size=(M, K)).astype(np.float32)
    b_np = np.random.uniform(size=(N, K)).astype(np.float32)
    c_np = np.dot(a_np, b_np.T)

    ctx = tvm.metal()
    #ctx = tvm.cpu()

    a_tvm = tvm.nd.array(a_np, ctx=ctx)
    b_tvm = tvm.nd.array(b_np, ctx=ctx)
    c_tvm = tvm.nd.array(c_np, ctx=ctx)

    func(a_tvm, b_tvm, c_tvm)

    # Check results
    np.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-3)

    # Evaluate execution time
    evaluator = func.time_evaluator(func.entry_name,
                                    ctx,
                                    min_repeat_ms=500,
Esempio n. 10
0
def run_timing(device, platform, model, remote=None, autotvm_log=None, batch=1, runs=3, reps=5, log=None):
    """
    Run a time trail on TVM

    :param device: The device to run this on
    :param platform: The platform get the machine learning model on
    :param model: The machine learning model to use
    :param remote: Details about the remote device
    :param autotvm_log: The path to the auto TVM file
    :param batch: The number of pictures to run in one go
    :param runs: The number of runs to run the picture through
    :param reps: The number of times the measurement should be repeated
    :param log: The output file
    """

    # Output details of run
    from cpuinfo import get_cpu_info
    from datetime import datetime

    print("\n──────────────────────────── TVMUI ────────────────────────────\n")
    log.write("TVM Time Trial\n")
    log_print(log, "Started on " + str(datetime.now().strftime("%m/%d/%Y at %H:%M:%S")))
    if remote is None:
        log_print(log, 'Hardware: ' + device)
        if device == 'x86':
            log_print(log, 'CPU Type: ' + get_cpu_info().get('brand_raw'))
    else:
        log_print(log, 'Remote Name: ' + remote["name"])
        log_print(log, 'Remote Device: ' + remote["type"])
        log_print(log, 'Remote Hardware: ' + remote["hardware"])
    log_print(log, 'Backend: ' + platform)
    log_print(log, 'Model: ' + model)
    log_print(log, str(batch) + " picture(s) per run")
    log_print(log, str(runs) + " run average, repeated " + str(reps) + " times.")
    if autotvm_log is None:
        log_print(log, 'AutoTVM: No\n')
    else:
        log_print(log, 'AutoTVM: Yes\n')

    # Get the model and image data
    import numpy as np
    from PIL import Image
    from tvm import relay
    import tvm
    from tvm.contrib.download import download_testdata

    print("Loading models and images...")

    pictures = get_pics(batch)
    dataset = []

    if platform == "MXNet":
        from mxnet.gluon.model_zoo.vision import get_model

        block = get_model(model, pretrained=True)

        synset_url = "".join(
            [
                "https://gist.githubusercontent.com/zhreshold/",
                "4d0b62f3d01426887599d4f7ede23ee5/raw/",
                "596b27d23537e5a1b5751d2b0481ef172f58b539/",
                "imagenet1000_clsid_to_human.txt",
            ]
        )
        synset_name = "imagenet1000_clsid_to_human.txt"
        synset_path = download_testdata(synset_url, synset_name, module="data")
        with open(synset_path) as f:
            synset = eval(f.read())

        def transform_image(image):
            image = np.array(image) - np.array([123.0, 117.0, 104.0])
            image /= np.array([58.395, 57.12, 57.375])
            image = image.transpose((2, 0, 1))
            image = image[np.newaxis, :]
            return image

        if model == 'resnet18_v1' or model == 'mobilenetv2_1.0':
            for img in pictures:
                dataset.append(transform_image(Image.open(img).resize((224, 224))))
            input_shape = [batch, 3, 224, 224]

        elif model == 'inceptionv3':
            for img in pictures:
                dataset.append(transform_image(Image.open(img).resize((299, 299))))
            input_shape = [batch, 3, 299, 299]
        else:
            raise Exception("Invalid Model")

        shape_dict = {"data": input_shape}

        mod, params = relay.frontend.from_mxnet(block, shape_dict)
        func = mod["main"]
        func = relay.Function(func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs)

    elif platform == "PyTorch":
        import torch
        import torchvision

        model = getattr(torchvision.models, model)(pretrained=True)
        model = model.eval()

        # We grab the TorchScripted model via tracing
        input_shape = [batch, 3, 224, 224]
        input_data = torch.randn(input_shape)
        scripted_model = torch.jit.trace(model, input_data).eval()

        synset_url = "".join(
            [
                "https://raw.githubusercontent.com/Cadene/",
                "pretrained-models.pytorch/master/data/",
                "imagenet_synsets.txt",
            ]
        )
        synset_name = "imagenet_synsets.txt"
        synset_path = download_testdata(synset_url, synset_name, module="data")
        with open(synset_path) as f:
            synsets = f.readlines()
        synsets = [x.strip() for x in synsets]
        splits = [line.split(" ") for line in synsets]
        key_to_classname = {spl[0]: " ".join(spl[1:]) for spl in splits}

        class_url = "".join(
            [
                "https://raw.githubusercontent.com/Cadene/",
                "pretrained-models.pytorch/master/data/",
                "imagenet_classes.txt",
            ]
        )
        class_name = "imagenet_classes.txt"
        class_path = download_testdata(class_url, class_name, module="data")
        with open(class_path) as f:
            class_id_to_key = f.readlines()
        class_id_to_key = [x.strip() for x in class_id_to_key]

        def transform_image(image):
            from torchvision import transforms

            my_preprocess = transforms.Compose(
                [
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ]
            )
            img = my_preprocess(image)
            return np.expand_dims(img, 0)

        for img in pictures:
            dataset.append(transform_image(Image.open(img).resize((224, 224))))

        input_name = "data"
        shape_list = [(input_name, input_shape)]
        func, params = relay.frontend.from_pytorch(scripted_model, shape_list)
    elif platform == "TensorFlow":
        import tensorflow as tf
        import os

        try:
            tf_compat_v1 = tf.compat.v1
        except ImportError:
            tf_compat_v1 = tf
        import tvm.relay.testing.tf as tf_testing

        # Base location for model related files.
        repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
        model_name = "classify_image_graph_def-with_shapes.pb"
        model_url = os.path.join(repo_base, model_name)

        # Image label map
        map_proto = "imagenet_2012_challenge_label_map_proto.pbtxt"
        map_proto_url = os.path.join(repo_base, map_proto)

        # Human readable text for labels
        label_map = "imagenet_synset_to_human_label_map.txt"
        label_map_url = os.path.join(repo_base, label_map)

        model_path = download_testdata(model_url, model_name, module=["tf", "InceptionV1"])
        map_proto_path = download_testdata(map_proto_url, map_proto, module="data")
        label_path = download_testdata(label_map_url, label_map, module="data")

        with tf_compat_v1.gfile.GFile(model_path, "rb") as f:
            graph_def = tf_compat_v1.GraphDef()
            graph_def.ParseFromString(f.read())
            graph = tf.import_graph_def(graph_def, name="")
            # Call the utility to import the graph definition into default graph.
            graph_def = tf_testing.ProcessGraphDefParam(graph_def)
            # Add shapes to the graph.
            with tf_compat_v1.Session() as sess:
                graph_def = tf_testing.AddShapesToGraphDef(sess, "softmax")
        for img in pictures:
            dataset.append(np.array(Image.open(img).resize((299, 299))))
        shape_dict = {"data": [batch, 3, 299, 299]}
        dtype_dict = {"DecodeJpeg/contents": "uint8"}
        mod, params = relay.frontend.from_tensorflow(graph_def, layout=None, shape=shape_dict)
    else:
        raise Exception('Not Supported!')

    # Build the graph
    if device == 'x86':
        target = "llvm"
        ctx = tvm.cpu(0)
        log_print(log, 'Target: ' + target)
    elif device == 'Metal':
        target = "metal"
        ctx = tvm.metal(0)
        log_print(log, 'Target: ' + target)
    elif device == 'arm_cpu':
        target = tvm.target.arm_cpu(remote["type"])
        ctx = tvm.cpu(0)
        log_print(log, 'Target: ' + remote["type"])
    else:
        target = device
        ctx = tvm.cpu(0)
        log_print(log, 'Target: ' + device)
    log_print(log, 'Actual Model: ' + model + '\n')
    print('Making the graph...')
    if autotvm_log is not None:
        from tvm import autotvm
        log_print(log, 'Using AutoTVM file ' + autotvm_log)
        with autotvm.apply_graph_best(autotvm_log):
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build(func, target, params=params)
    else:
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build(func, target, params=params)

    print("\nSetting up TVM...")
    from tvm.contrib import graph_runtime

    # Remote upload
    if remote is not None:
        from tvm import rpc
        from tvm.contrib import utils, graph_runtime as runtime
        print("Exporting graph...")
        tmp = utils.tempdir()
        lib_fname = tmp.relpath("net.tar")
        lib.export_library(lib_fname)
        print("Connecting to device...")
        remote = rpc.connect(str(remote["ip"]), int(remote["port"]))
        print("Uploading to device...")
        remote.upload(lib_fname)
        lib = remote.load_module("net.tar")
        if device == 'x86':
            ctx = remote.cpu(0)
        elif device == 'Metal':
            ctx = remote.metal(0)
        elif device == 'arm_cpu':
            ctx = remote.cpu(0)
        else:
            ctx = remote.cpu(0)
    dtype = "float32"
    m = graph_runtime.GraphModule(lib["default"](ctx))

    def run_tvm(pics, number, repeat):
        """
        Runs a single inference and gives back the time

        :param pics: The images(s) to run
        :param number: The number of times to run the inference
        :param repeat:  The number of times to repeat the measurement
        :return: An array with the time and the result
        """

        # combine pictures
        arr = np.ndarray(shape=input_shape, dtype=dtype)
        p = 0
        for ip in pics:
            arr[p] = ip.astype(dtype)
            p = p + 1
        m.set_input("data", tvm.nd.array(arr))

        #Actually run inference
        time = m.module.time_evaluator("run", ctx, number=number, repeat=repeat)()

        #Get output
        res = []
        if platform == 'MXNet':
            for i in range(len(pics)):
                res.append(synset[np.argmax(m.get_output(0).asnumpy()[i])])
        if platform == 'PyTorch':
            # Get top-1 result for TVM
            for i in range(len(pics)):
                top1_tvm = np.argmax(m.get_output(0).asnumpy()[i])
                tvm_class_key = class_id_to_key[top1_tvm]
                res.append(key_to_classname[tvm_class_key])
        if platform == 'TensorFlow':
            pre = np.squeeze(m.get_output(0, tvm.nd.empty(((1, 1008)), "float32")).asnumpy())
            node_lookup = tf_testing.NodeLookup(label_lookup_path=map_proto_path, uid_lookup_path=label_path)
            top_k = pre.argsort()[-5:][::-1]
            res = node_lookup.id_to_string(top_k[0])
        return [time, res]

    # Run the inferences
    output = []
    total = 0

    print("\nRunning inferences...")
    for i in range(int(len(dataset) / batch)):
        log_print(log, "\nSet " + str(i + 1) + ":")
        inp = []
        # Create the next batch
        for j in range(batch):
            inp.append(dataset[batch * i + j])
        # Run inference here
        output = run_tvm(inp, runs, reps)
        # Output results
        e = 0
        for rl in output[1]:
            log_print(log, "Image " + str(e + 1) + " Path: " + pictures[batch * i + e])
            log_print(log, "Image " + str(e + 1) + " ID: " + rl)
            e = e + 1
        log_print(log, "Time taken: " + str('%.2f' % (1000 * output[0].mean)) + " ms")
        total = total + output[0].mean
    ave = total / int(len(dataset) / batch)
    log_print(log, '\nAVERAGE TIME: ' + str(ave * 1000) + " ms")
    log_print(log, "Finished on " + str(datetime.now().strftime("%m/%d/%Y at %H:%M:%S")))
    log.close()
    return