Ejemplo n.º 1
0
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, data_shape, out_shape = get_network(model_name, batch_size)
    tasks = autotvm.task.extract_from_program(mod["main"], target=target,
                                              params=params,
                                              ops=(relay.op.get("nn.conv2d"),))

    # run tuning tasks
    tune_kernels(tasks, **tuning_opt)
    tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file)

    # compile kernels with graph-level best records
    with autotvm.apply_graph_best(graph_opt_sch_file):
        print("Compile...")
        with tvm.transform.PassContext(opt_level=3):
            graph, lib, params = relay.build_module.build(
                mod, target=target, params=params)

        # upload parameters to device
        ctx = tvm.cpu()
        data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
        module = runtime.create(graph, lib, ctx)
        module.set_input(input_name, data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Ejemplo n.º 2
0
def tune_and_evaluate(mod, params, input_shape, dtype, measure_top_n, target,
                      tuning_opt, graph_log_file, best_results_file):
    """Tune a model with the ranking model and evaluate the performance."""

    sys.stderr.write("Extract conv2d tasks...\n")
    tasks = autotvm.task.extract_from_program(mod["main"],
                                              target=target,
                                              params=params)
    # Run tuning tasks.
    if graph_log_file is not None and not os.path.exists(graph_log_file):
        best_results = tune_kernels(tasks, True, measure_top_n, **tuning_opt)
        tune_graph(mod["main"], input_shape[1], target,
                   tuning_opt['log_filename'], graph_log_file)
    else:
        best_results = tune_kernels(tasks, False, measure_top_n, **tuning_opt)
    with open(best_results_file, 'w') as of:
        json.dump(best_results, of)

    dispatch_ctx = tvm.autotvm.task.DispatchContext.current

    if graph_log_file is not None and os.path.exists(graph_log_file):
        sys.stderr.write("Compile model with graph tuning...\n")
        tvm.autotvm.task.DispatchContext.current = autotvm.apply_graph_best(
            graph_log_file)
    elif os.path.exists(tuning_opt['log_filename']):
        sys.stderr.write("Compile model without graph tuning...\n")
        tvm.autotvm.task.DispatchContext.current = autotvm.apply_history_best(
            tuning_opt['log_filename'])
    else:
        sys.stderr.write("Compile model with fallback + tophub...\n")

    compile_engine.get().clear()
    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build_module.build(mod,
                                                      target=target,
                                                      params=params)
    tvm.autotvm.task.DispatchContext.current = dispatch_ctx

    # Load parameters.
    ctx = tvm.context(str(target), 0)
    module = runtime.create(graph, lib, ctx)
    data_tvm = tvm.nd.array(
        (np.random.uniform(size=input_shape[1])).astype(dtype))
    module.set_input(input_shape[0], data_tvm)
    module.set_input(**params)

    # Evaluate performance.
    sys.stderr.write("Evaluate inference time cost...\n")
    ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
    prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
    sys.stderr.write("Median inference time: %.2f ms\n" % np.median(prof_res))
Ejemplo n.º 3
0
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    onnx_model = onnx.load('facenet.onnx')
    print("Extract tasks...")
    input_name = 'input_input'
    input = np.random.rand(1, 3, 112, 112)
    shape_dict = {input_name: input.shape}
    mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
    data_shape = (1, 3, 112, 112)
    output_shape = (1, 512)
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        params=params,
        ops=(relay.op.get("nn.conv2d"), ))

    print("run tuning tasks")
    # run tuning tasks
    tune_kernels(tasks, **tuning_opt)
    tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file)

    # compile kernels with graph-level best records
    with autotvm.apply_graph_best(graph_opt_sch_file):
        print("Compile...")
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(mod,
                                                          target=target,
                                                          params=params)

        # upload parameters to device
        ctx = tvm.cpu()
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=data_shape)).astype(dtype))
        module = runtime.create(graph, lib, ctx)
        module.set_input(input_name, data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
        path_lib = "deploy/lib/facenet_tuneing_cpu.so"
        lib.export_library(path_lib)
        fo = open("facenet_tuneing_cpu.json", "w")
        fo.write(graph)
        fo.close()
        fo = open("facenet_cpu.params", "wb")
        fo.write(relay.save_param_dict(params))
        fo.close()
Ejemplo n.º 4
0
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, data_shape, out_shape = get_network(model_name, batch_size)
    # why "main"?
    tasks = autotvm.task.extract_from_program(mod["main"],
                                              target=target,
                                              params=params,
                                              ops=(relay.op.nn.conv2d, ))

    #     # run tuning tasks
    print("Tuning...")
    tune_kernels(tasks, **tuning_opt)
    # compile kernels with graph-level best records
    tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file)

    with autotvm.apply_graph_best(graph_opt_sch_file):
        print("Compile...")
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(mod,
                                                          target=target,
                                                          params=params)
#             graph, lib, params = relay.build(mod,
#                                      target=target,
#                                      target_host=target,
#                                      params=params)

        base_path = './lib'
        path_lib = os.path.join(base_path, "deploy_lib.so")
        lib.export_library(path_lib)
        with open(os.path.join(base_path, "deploy_graph.json"), "w") as fo:
            fo.write(graph)
        with open(os.path.join(base_path, "deploy_param.params"), "wb") as fo:
            fo.write(relay.save_param_dict(params))

        # upload parameters to device
        ctx = tvm.cpu()
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=(1, 380, 380, 1))).astype(dtype))
        #         data_tvm = preprocess(img_path)
        module = runtime.create(graph, lib, ctx)
        module.set_input(input_name, data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Ejemplo n.º 5
0
def benchmark(network, batch_size, dtype, target, log_prefix, repeat):
    layout = "NCHW"
    mod, params, input_name, input_shape, output_shape = get_network(
        network, batch_size, dtype, layout)

    if use_graph_tuner(network, batch_size, dtype, target):
        log_file = log_prefix + ".graph.log"
        history_best_context = autotvm.apply_graph_best(log_file)
    else:
        log_file = log_prefix + ".kernel.log"
        history_best_context = autotvm.apply_history_best(log_file)

    assert os.path.exists(
        log_file), "The log file '%s' does not exist." % log_file
    print("Use log file %s" % log_file)

    if network in ["bert"]:
        # Build module
        with history_best_context:
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build(mod, target=target, params=params)
        ctx = tvm.context(str(target), 0)
        module = runtime.GraphModule(lib["default"](ctx))

        # Feed input data
        seq_length = input_shape[0][1]
        data = np.random.uniform(size=input_shape[0])
        token_types = np.random.uniform(size=input_shape[1])
        valid_length = np.array([seq_length] * batch_size)
        module.set_input(data0=data, data1=token_types, data2=valid_length)
    else:
        # Build module
        with history_best_context:
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build(mod, target=target, params=params)
        ctx = tvm.context(str(target), 0)
        module = runtime.GraphModule(lib["default"](ctx))

        # Feed input data
        data = np.random.uniform(size=input_shape)
        module.set_input(input_name, data)

    # Evaluate
    ftimer = module.module.time_evaluator("run",
                                          ctx,
                                          min_repeat_ms=500,
                                          repeat=repeat)
    return np.array(ftimer().results)
def benchmark(network, target, log_prefix):
    mod, params, input_shape, output_shape = get_network(network)
    # covert to NCHW
    desired_layouts = {'nn.conv2d': ['NCHW', 'default']}
    seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(),
                                    relay.transform.ConvertLayout(desired_layouts)])
    with tvm.transform.PassContext(opt_level=3):
        mod = seq(mod)

    if network in ["bert"]:
        log_file = log_prefix + "_kernel.log"
        with autotvm.apply_history_best(log_file):
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build_module.build(mod, target=target, params=params)
                # upload parameters to device
            ctx = tvm.context(str(target), 0)
            data_tvm = tvm.nd.array((np.random.uniform(size=input_shape[0])).astype(dtype))
            token_types_tvm = tvm.nd.array(np.random.uniform(size=input_shape[1]).astype(dtype))
            valid_length_tvm = tvm.nd.array(np.random.uniform(size=input_shape[2]).astype(dtype))
            module = runtime.GraphModule(lib["default"](ctx))
            module.set_input(data0=data_tvm, data1=token_types_tvm, data2=valid_length_tvm)
    else:
        log_file = log_prefix + "_graph.log"
        with autotvm.apply_graph_best(log_file):
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build_module.build(mod, target=target, params=params)

            # upload parameters to device
            ctx = tvm.context(str(target), 0)
            data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
            module = runtime.GraphModule(lib["default"](ctx))
            module.set_input(args.inputname, data_tvm)

    # evaluate
    print("Evaluate...")
    ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=args.repeat)
    prof_res = np.array(ftimer().results) * 1000  # multiply 1000 for converting to millisecond
    print(
        "%-20s %-19s (%s)" % (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res))
    )
Ejemplo n.º 7
0
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, data_shape, out_shape = get_network(batch_size)
    # tasks = autotvm.task.extract_from_program(mod["main"], target=target,
    #                                           params=params, ops=(relay.op.nn.conv2d,))
    #
    # # run tuning tasks
    # print("Tuning...")
    # tune_kernels(tasks, **tuning_opt)
    # tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file, use_DP=False)

    # compile kernels with graph-level best records
    with autotvm.apply_graph_best(graph_opt_sch_file):
        print("Compile...")
        with relay.build_config(opt_level=1):
            graph, lib, params = relay.build_module.build(
                mod, target=target, params=params)

        temp = util.tempdir()
        path_lib = temp.relpath(lib_dir)
        lib.export_library(path_lib)
        with open(temp.relpath(graph_dir), "w") as fo:
            fo.write(graph)
        with open(temp.relpath(params_dir), "wb") as fo:
            fo.write(relay.save_param_dict(params))

        # upload parameters to device
        ctx = tvm.cpu()
        data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
        module = runtime.create(graph, lib, ctx)
        module.set_input(input_name, data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Ejemplo n.º 8
0
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, data_shape, out_shape = get_network(model_name, batch_size)
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        params=params,
        ops=(relay.op.get("nn.conv2d"), ))

    # run tuning tasks
    tune_kernels(tasks, **tuning_opt)
    tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file)

    # compile kernels in default mode
    print(
        "Evaluation of the network compiled in 'default' mode without auto tune:"
    )
    with tvm.transform.PassContext(opt_level=3):
        print("Compile...")
        lib = relay.build(mod, target=target, params=params)
        evaluate_performance(lib, data_shape)

    # compile kernels in kernel tuned only mode
    print("\nEvaluation of the network been tuned on kernel level:")
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build(mod, target=target, params=params)
        evaluate_performance(lib, data_shape)

    # compile kernels with graph-level best records
    print("\nEvaluation of the network been tuned on graph level:")
    with autotvm.apply_graph_best(graph_opt_sch_file):
        print("Compile...")
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build_module.build(mod, target=target, params=params)
        evaluate_performance(lib, data_shape)
Ejemplo n.º 9
0
    # dynamic to static(maybe useful)
    mod = relay.transform.DynamicToStatic()(mod)
    print(mod.astext(show_meta_data=False))

    # print("Extract tasks...")
    # tasks = autotvm.task.extract_from_program(
    #     mod["main"], target=target, params=params, ops=(relay.op.get("nn.conv2d"),)
    # ) # 例程上,target = "llvm"
    # for i, task in enumerate(tasks):
    #     print(len(task.config_space))

    # tune_kernels(tasks, **tuning_option) # tuning
    # tune_graph(mod["main"], data_shape, 'unet_cpu_2_thread.log', graph_opt_sch_file,exec_num = 1000) # tuning

    # # #只需要得到这个opt_sch_file就可
    with autotvm.apply_graph_best(graph_opt_sch_file):  # graph_opt_sch_file
        print("compile...")
        with tvm.transform.PassContext(opt_level=3):  # set < 3
            # lib = relay.build_module.build(mod,target,params = params)
            lib = relay.build(mod, target, params=params)
        # m = graph_executor.GraphModule(lib["default"](dev))
        with open(graph_opt_sch_file, 'r') as f:
            graph = f.read()
        m = graph_executor.create(graph,
                                  lib['default'],
                                  dev,
                                  dump_root="/tmp/tvmdbg")
        # set input and get_output
        m.set_input(input_name,
                    tvm.nd.array(x.astype(dtype)))  # input_name = 'x'
        # must set 'x' as input here due to previous channel translating
Ejemplo n.º 10
0
def run_timing(device, platform, model, remote=None, autotvm_log=None, batch=1, runs=3, reps=5, log=None):
    """
    Run a time trail on TVM

    :param device: The device to run this on
    :param platform: The platform get the machine learning model on
    :param model: The machine learning model to use
    :param remote: Details about the remote device
    :param autotvm_log: The path to the auto TVM file
    :param batch: The number of pictures to run in one go
    :param runs: The number of runs to run the picture through
    :param reps: The number of times the measurement should be repeated
    :param log: The output file
    """

    # Output details of run
    from cpuinfo import get_cpu_info
    from datetime import datetime

    print("\n──────────────────────────── TVMUI ────────────────────────────\n")
    log.write("TVM Time Trial\n")
    log_print(log, "Started on " + str(datetime.now().strftime("%m/%d/%Y at %H:%M:%S")))
    if remote is None:
        log_print(log, 'Hardware: ' + device)
        if device == 'x86':
            log_print(log, 'CPU Type: ' + get_cpu_info().get('brand_raw'))
    else:
        log_print(log, 'Remote Name: ' + remote["name"])
        log_print(log, 'Remote Device: ' + remote["type"])
        log_print(log, 'Remote Hardware: ' + remote["hardware"])
    log_print(log, 'Backend: ' + platform)
    log_print(log, 'Model: ' + model)
    log_print(log, str(batch) + " picture(s) per run")
    log_print(log, str(runs) + " run average, repeated " + str(reps) + " times.")
    if autotvm_log is None:
        log_print(log, 'AutoTVM: No\n')
    else:
        log_print(log, 'AutoTVM: Yes\n')

    # Get the model and image data
    import numpy as np
    from PIL import Image
    from tvm import relay
    import tvm
    from tvm.contrib.download import download_testdata

    print("Loading models and images...")

    pictures = get_pics(batch)
    dataset = []

    if platform == "MXNet":
        from mxnet.gluon.model_zoo.vision import get_model

        block = get_model(model, pretrained=True)

        synset_url = "".join(
            [
                "https://gist.githubusercontent.com/zhreshold/",
                "4d0b62f3d01426887599d4f7ede23ee5/raw/",
                "596b27d23537e5a1b5751d2b0481ef172f58b539/",
                "imagenet1000_clsid_to_human.txt",
            ]
        )
        synset_name = "imagenet1000_clsid_to_human.txt"
        synset_path = download_testdata(synset_url, synset_name, module="data")
        with open(synset_path) as f:
            synset = eval(f.read())

        def transform_image(image):
            image = np.array(image) - np.array([123.0, 117.0, 104.0])
            image /= np.array([58.395, 57.12, 57.375])
            image = image.transpose((2, 0, 1))
            image = image[np.newaxis, :]
            return image

        if model == 'resnet18_v1' or model == 'mobilenetv2_1.0':
            for img in pictures:
                dataset.append(transform_image(Image.open(img).resize((224, 224))))
            input_shape = [batch, 3, 224, 224]

        elif model == 'inceptionv3':
            for img in pictures:
                dataset.append(transform_image(Image.open(img).resize((299, 299))))
            input_shape = [batch, 3, 299, 299]
        else:
            raise Exception("Invalid Model")

        shape_dict = {"data": input_shape}

        mod, params = relay.frontend.from_mxnet(block, shape_dict)
        func = mod["main"]
        func = relay.Function(func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs)

    elif platform == "PyTorch":
        import torch
        import torchvision

        model = getattr(torchvision.models, model)(pretrained=True)
        model = model.eval()

        # We grab the TorchScripted model via tracing
        input_shape = [batch, 3, 224, 224]
        input_data = torch.randn(input_shape)
        scripted_model = torch.jit.trace(model, input_data).eval()

        synset_url = "".join(
            [
                "https://raw.githubusercontent.com/Cadene/",
                "pretrained-models.pytorch/master/data/",
                "imagenet_synsets.txt",
            ]
        )
        synset_name = "imagenet_synsets.txt"
        synset_path = download_testdata(synset_url, synset_name, module="data")
        with open(synset_path) as f:
            synsets = f.readlines()
        synsets = [x.strip() for x in synsets]
        splits = [line.split(" ") for line in synsets]
        key_to_classname = {spl[0]: " ".join(spl[1:]) for spl in splits}

        class_url = "".join(
            [
                "https://raw.githubusercontent.com/Cadene/",
                "pretrained-models.pytorch/master/data/",
                "imagenet_classes.txt",
            ]
        )
        class_name = "imagenet_classes.txt"
        class_path = download_testdata(class_url, class_name, module="data")
        with open(class_path) as f:
            class_id_to_key = f.readlines()
        class_id_to_key = [x.strip() for x in class_id_to_key]

        def transform_image(image):
            from torchvision import transforms

            my_preprocess = transforms.Compose(
                [
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ]
            )
            img = my_preprocess(image)
            return np.expand_dims(img, 0)

        for img in pictures:
            dataset.append(transform_image(Image.open(img).resize((224, 224))))

        input_name = "data"
        shape_list = [(input_name, input_shape)]
        func, params = relay.frontend.from_pytorch(scripted_model, shape_list)
    elif platform == "TensorFlow":
        import tensorflow as tf
        import os

        try:
            tf_compat_v1 = tf.compat.v1
        except ImportError:
            tf_compat_v1 = tf
        import tvm.relay.testing.tf as tf_testing

        # Base location for model related files.
        repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
        model_name = "classify_image_graph_def-with_shapes.pb"
        model_url = os.path.join(repo_base, model_name)

        # Image label map
        map_proto = "imagenet_2012_challenge_label_map_proto.pbtxt"
        map_proto_url = os.path.join(repo_base, map_proto)

        # Human readable text for labels
        label_map = "imagenet_synset_to_human_label_map.txt"
        label_map_url = os.path.join(repo_base, label_map)

        model_path = download_testdata(model_url, model_name, module=["tf", "InceptionV1"])
        map_proto_path = download_testdata(map_proto_url, map_proto, module="data")
        label_path = download_testdata(label_map_url, label_map, module="data")

        with tf_compat_v1.gfile.GFile(model_path, "rb") as f:
            graph_def = tf_compat_v1.GraphDef()
            graph_def.ParseFromString(f.read())
            graph = tf.import_graph_def(graph_def, name="")
            # Call the utility to import the graph definition into default graph.
            graph_def = tf_testing.ProcessGraphDefParam(graph_def)
            # Add shapes to the graph.
            with tf_compat_v1.Session() as sess:
                graph_def = tf_testing.AddShapesToGraphDef(sess, "softmax")
        for img in pictures:
            dataset.append(np.array(Image.open(img).resize((299, 299))))
        shape_dict = {"data": [batch, 3, 299, 299]}
        dtype_dict = {"DecodeJpeg/contents": "uint8"}
        mod, params = relay.frontend.from_tensorflow(graph_def, layout=None, shape=shape_dict)
    else:
        raise Exception('Not Supported!')

    # Build the graph
    if device == 'x86':
        target = "llvm"
        ctx = tvm.cpu(0)
        log_print(log, 'Target: ' + target)
    elif device == 'Metal':
        target = "metal"
        ctx = tvm.metal(0)
        log_print(log, 'Target: ' + target)
    elif device == 'arm_cpu':
        target = tvm.target.arm_cpu(remote["type"])
        ctx = tvm.cpu(0)
        log_print(log, 'Target: ' + remote["type"])
    else:
        target = device
        ctx = tvm.cpu(0)
        log_print(log, 'Target: ' + device)
    log_print(log, 'Actual Model: ' + model + '\n')
    print('Making the graph...')
    if autotvm_log is not None:
        from tvm import autotvm
        log_print(log, 'Using AutoTVM file ' + autotvm_log)
        with autotvm.apply_graph_best(autotvm_log):
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build(func, target, params=params)
    else:
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build(func, target, params=params)

    print("\nSetting up TVM...")
    from tvm.contrib import graph_runtime

    # Remote upload
    if remote is not None:
        from tvm import rpc
        from tvm.contrib import utils, graph_runtime as runtime
        print("Exporting graph...")
        tmp = utils.tempdir()
        lib_fname = tmp.relpath("net.tar")
        lib.export_library(lib_fname)
        print("Connecting to device...")
        remote = rpc.connect(str(remote["ip"]), int(remote["port"]))
        print("Uploading to device...")
        remote.upload(lib_fname)
        lib = remote.load_module("net.tar")
        if device == 'x86':
            ctx = remote.cpu(0)
        elif device == 'Metal':
            ctx = remote.metal(0)
        elif device == 'arm_cpu':
            ctx = remote.cpu(0)
        else:
            ctx = remote.cpu(0)
    dtype = "float32"
    m = graph_runtime.GraphModule(lib["default"](ctx))

    def run_tvm(pics, number, repeat):
        """
        Runs a single inference and gives back the time

        :param pics: The images(s) to run
        :param number: The number of times to run the inference
        :param repeat:  The number of times to repeat the measurement
        :return: An array with the time and the result
        """

        # combine pictures
        arr = np.ndarray(shape=input_shape, dtype=dtype)
        p = 0
        for ip in pics:
            arr[p] = ip.astype(dtype)
            p = p + 1
        m.set_input("data", tvm.nd.array(arr))

        #Actually run inference
        time = m.module.time_evaluator("run", ctx, number=number, repeat=repeat)()

        #Get output
        res = []
        if platform == 'MXNet':
            for i in range(len(pics)):
                res.append(synset[np.argmax(m.get_output(0).asnumpy()[i])])
        if platform == 'PyTorch':
            # Get top-1 result for TVM
            for i in range(len(pics)):
                top1_tvm = np.argmax(m.get_output(0).asnumpy()[i])
                tvm_class_key = class_id_to_key[top1_tvm]
                res.append(key_to_classname[tvm_class_key])
        if platform == 'TensorFlow':
            pre = np.squeeze(m.get_output(0, tvm.nd.empty(((1, 1008)), "float32")).asnumpy())
            node_lookup = tf_testing.NodeLookup(label_lookup_path=map_proto_path, uid_lookup_path=label_path)
            top_k = pre.argsort()[-5:][::-1]
            res = node_lookup.id_to_string(top_k[0])
        return [time, res]

    # Run the inferences
    output = []
    total = 0

    print("\nRunning inferences...")
    for i in range(int(len(dataset) / batch)):
        log_print(log, "\nSet " + str(i + 1) + ":")
        inp = []
        # Create the next batch
        for j in range(batch):
            inp.append(dataset[batch * i + j])
        # Run inference here
        output = run_tvm(inp, runs, reps)
        # Output results
        e = 0
        for rl in output[1]:
            log_print(log, "Image " + str(e + 1) + " Path: " + pictures[batch * i + e])
            log_print(log, "Image " + str(e + 1) + " ID: " + rl)
            e = e + 1
        log_print(log, "Time taken: " + str('%.2f' % (1000 * output[0].mean)) + " ms")
        total = total + output[0].mean
    ave = total / int(len(dataset) / batch)
    log_print(log, '\nAVERAGE TIME: ' + str(ave * 1000) + " ms")
    log_print(log, "Finished on " + str(datetime.now().strftime("%m/%d/%Y at %H:%M:%S")))
    log.close()
    return