Beispiel #1
0
                                                tracker_host,
                                                int(tracker_port),
                                                timeout=10000)
    # Reconfigure the JIT runtime and FPGA.
    # You can program the FPGA with your own custom bitstream
    # by passing the path to the bitstream file instead of None.
    reconfig_start = time.time()
    vta.reconfig_runtime(remote)
    vta.program_fpga(remote, bitstream=None)
    reconfig_time = time.time() - reconfig_start
    print(
        "Reconfigured FPGA and RPC runtime in {0:.2f}s!".format(reconfig_time))

# In simulation mode, host the RPC server locally.
else:
    remote = rpc.LocalSession()

# Get execution context from remote
ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)

####################################
# Build the inference graph runtime.
# ----------------------------------
# Using Darknet library load downloaded vision model and compile with Relay.
# The compilation steps are:
#
# 1. Front end translation from Darknet into Relay module.
# 2. Apply 8-bit quantization: here we skip the first conv layer,
#    and dense layer which will both be executed in fp32 on the CPU.
# 3. Perform graph packing to alter the data layout for tensorization.
# 4. Perform constant folding to reduce number of operators (e.g. eliminate batch norm multiply).
Beispiel #2
0
def test_rpc_remote_module():
    # graph
    n = tvm.runtime.convert(102)
    A = te.placeholder((n, ), name="A")
    B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B")
    s = te.create_schedule(B.op)

    server0 = rpc.Server(key="x0")
    server1 = rpc.Server(key="x1")

    client = rpc.connect(
        "127.0.0.1",
        server0.port,
        key="x0",
        session_constructor_args=[
            "rpc.Connect", "127.0.0.1", server1.port, "x1", False
        ],
    )

    def check_remote(remote):
        temp = utils.tempdir()
        dev = remote.cpu(0)
        f = tvm.build(s, [A, B], "llvm", name="myadd")
        path_dso = temp.relpath("dev_lib.so")
        f.export_library(path_dso)
        remote.upload(path_dso)
        f1 = remote.load_module("dev_lib.so")
        a = tvm.nd.array(np.random.uniform(size=102).astype(A.dtype), dev)
        b = tvm.nd.array(np.zeros(102, dtype=A.dtype), dev)
        time_f = f1.time_evaluator(f1.entry_name, remote.cpu(0), number=10)
        cost = time_f(a, b).mean
        print("%g secs/op" % cost)
        np.testing.assert_equal(b.numpy(), a.numpy() + 1)

        # Download the file from the remote
        path_tar = temp.relpath("dev_lib.tar")
        f.export_library(path_tar)
        remote.upload(path_tar)
        local_download_path = temp.relpath("dev_lib.download.so")
        with open(local_download_path, "wb") as fo:
            fo.write(remote.download_linked_module("dev_lib.tar"))
        fupdated = tvm.runtime.load_module(local_download_path)
        a = tvm.nd.array(
            np.random.uniform(size=102).astype(A.dtype), tvm.cpu(0))
        b = tvm.nd.array(np.zeros(102, dtype=A.dtype), tvm.cpu(0))
        fupdated(a, b)
        np.testing.assert_equal(b.numpy(), a.numpy() + 1)

    def check_minrpc():
        if tvm.get_global_func("rpc.CreatePipeClient",
                               allow_missing=True) is None:
            return
        # export to minrpc
        temp = utils.tempdir()
        runtime = Runtime("cpp", {"system-lib": True})
        f = tvm.build(s, [A, B], "llvm", name="myadd", runtime=runtime)
        path_minrpc = temp.relpath("dev_lib.minrpc")
        f.export_library(path_minrpc, rpc.with_minrpc(cc.create_executable))

        with pytest.raises(RuntimeError):
            rpc.PopenSession("filenotexist")

        # statrt the minrpc session.
        remote = tvm.rpc.PopenSession(path_minrpc)
        dev = remote.cpu(0)
        f1 = remote.system_lib()

        a = tvm.nd.array(np.random.uniform(size=102).astype(A.dtype), dev)
        b = tvm.nd.array(np.zeros(102, dtype=A.dtype), dev)
        time_f = f1.time_evaluator("myadd", remote.cpu(0), number=1)
        cost = time_f(a, b).mean
        np.testing.assert_equal(b.numpy(), a.numpy() + 1)

        # change to not executable
        os.chmod(path_minrpc, stat.S_IRUSR)
        with pytest.raises(RuntimeError):
            rpc.PopenSession(path_minrpc)

    def check_remote_link_cl(remote):
        """Test function to run remote code such as cl

        This is not enabled because there is forking issue
        of TVM runtime when server launches after OpenCL
        runtime initializes. We leave it as an example
        on how to do rpc when we want to do linking on remote.
        """
        if not tvm.testing.device_enabled("opencl"):
            print("Skip because opencl is not enabled")
            return
        temp = utils.tempdir()
        dev = remote.cl(0)
        s = te.create_schedule(B.op)
        xo, xi = s[B].split(B.op.axis[0], factor=32)
        s[B].bind(xo, te.thread_axis("blockIdx.x"))
        s[B].bind(xi, te.thread_axis("threadIdx.x"))
        f = tvm.build(s, [A, B], "opencl --host=llvm", name="myadd")
        # Option 1: save modules separately and rely on remote compiler
        path_o = temp.relpath("myadd.o")
        path_cl = temp.relpath("myadd.cl")
        path_json = temp.relpath("myadd.tvm_meta.json")
        f.save(path_o)
        f.imported_modules[0].save(path_cl)
        remote.upload(path_o)
        remote.upload(path_cl)
        # upload meta data
        remote.upload(path_json)
        fhost = remote.load_module("myadd.o")
        fdev = remote.load_module("myadd.cl")
        fhost.import_module(fdev)
        a = tvm.nd.array(np.random.uniform(size=102).astype(A.dtype), dev)
        b = tvm.nd.array(np.zeros(102, dtype=A.dtype), dev)
        fhost(a, b)
        np.testing.assert_equal(b.numpy(), a.numpy() + 1)
        # Option 2: export library as a tar ball then handled by remote compiler
        path_tar = temp.relpath("myadd.tar")
        f.export_library(path_tar)
        remote.upload(path_tar)
        fhost = remote.load_module("myadd.tar")
        a = tvm.nd.array(np.random.uniform(size=102).astype(A.dtype), dev)
        b = tvm.nd.array(np.zeros(102, dtype=A.dtype), dev)
        fhost(a, b)
        np.testing.assert_equal(b.numpy(), a.numpy() + 1)

    check_remote(rpc.LocalSession())
    check_remote(client)
    check_minrpc()
def main():
    # one line to get the model
    block = get_model('resnet18_v1', pretrained=True)
    # test model
    img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
    img_name = 'cat.png'
    img_path = download_testdata(img_url, img_name, module='data')
    image = Image.open(img_path).resize((224, 224))
    # tvm specific data path
    # print(img_path)

    x = transform_image(image)

    # label number to word dict prepped with synset
    synset_url = ''.join([
        'https://gist.githubusercontent.com/zhreshold/',
        '4d0b62f3d01426887599d4f7ede23ee5/raw/',
        '596b27d23537e5a1b5751d2b0481ef172f58b539/',
        'imagenet1000_clsid_to_human.txt'
    ])
    synset_name = 'imagenet1000_clsid_to_human.txt'
    synset_path = download_testdata(synset_url, synset_name, module='data')
    with open(synset_path) as f:
        synset = eval(f.read())
    # print(synset)

    # Port GLuon model to portable computational graph
    batch_size = 1
    num_classes = 1000
    image_shape = (3, 224, 224)
    data_shape = (batch_size, ) + image_shape

    shape_dict = {'data': x.shape}
    mod, params = relay.frontend.from_mxnet(block, shape_dict)
    # we want a probability so add a softmax operator
    func = mod["main"]
    func = relay.Function(func.params, relay.nn.softmax(func.body), None,
                          func.type_params, func.attrs)

    # compile the graph to run on RaspPi modelB
    local_demo = False

    if local_demo:
        target = tvm.target.create('llvm')
    else:
        target = tvm.target.arm_cpu('rasp3b')

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(func, target, params=params)

    # Save the library at local temporary directory.
    tmp = util.tempdir()
    lib_fname = tmp.relpath('net.tar')
    lib.export_library(lib_fname)

    # RPC server is running on the Rasp Pi.
    # Get the IP address of the Rasp Pi and connect to the machine to run the net compiled here with Relay.

    # obtain an RPC session from remote device.
    if local_demo:
        remote = rpc.LocalSession()
    else:
        # The following is my environment, change this to the IP address of your target device
        host = '192.168.0.10'
        port = 9090
        remote = rpc.connect(host, port)

    # upload the library to remote device and load it
    remote.upload(lib_fname)
    rlib = remote.load_module('net.tar')

    # create the remote runtime module
    ctx = remote.cpu(0)
    module = runtime.create(graph, rlib, ctx)
    # set parameter (upload params to the remote device. This may take a while)
    module.set_input(**params)
    # set input data
    module.set_input('data', tvm.nd.array(x.astype('float32')))
    # run
    module.run()
    # get output
    out = module.get_output(0)
    # get top1 result
    top1 = np.argmax(out.asnumpy())
    print('TVM prediction top-1: {}'.format(synset[top1]))
def tune_and_evaluate(tuning_opt):

    if env.TARGET != "sim":
        # Get remote from fleet node
        remote = autotvm.measure.request_remote(env.TARGET,
                                                tracker_host,
                                                tracker_port,
                                                timeout=10000)
        # Reconfigure the JIT runtime and FPGA.
        vta.reconfig_runtime(remote)
        vta.program_fpga(remote, bitstream=None)
    else:
        # In simulation mode, host the RPC server locally.
        remote = rpc.LocalSession()

    # Register VTA tuning tasks
    register_vta_tuning_tasks()

    # Perform task extraction on Relay program
    print("Extract tasks...")
    relay_prog, params = compile_model()
    tasks = autotvm.task.extract_from_program(func=relay_prog,
                                              params=params,
                                              ops=(tvm.relay.op.nn.conv2d, ),
                                              target=target,
                                              target_host=env.target_host)

    # We should have extracted 10 convolution tasks
    assert len(tasks) == 10
    print("Extracted {} conv2d tasks:".format(len(tasks)))
    for tsk in tasks:
        print("\t{}".format(tsk))

    # We do not run the tuning in our webpage server since it takes too long.
    # Comment the following line to run it by yourself.
    # return

    # run tuning tasks
    print("Tuning...")
    tune_tasks(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.tophub.context(target, extra_files=[log_file]):
        # Compile network
        print("Compile...")
        with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
            if target.device_name != "vta":
                graph, lib, params = relay.build(relay_prog,
                                                 target=target,
                                                 params=params,
                                                 target_host=env.target_host)
            else:
                with vta.build_config():
                    graph, lib, params = relay.build(
                        relay_prog,
                        target=target,
                        params=params,
                        target_host=env.target_host)

        # Export library
        print("Upload...")
        temp = util.tempdir()
        lib.save(temp.relpath("graphlib.o"))
        remote.upload(temp.relpath("graphlib.o"))
        lib = remote.load_module("graphlib.o")

        # Generate the graph runtime
        ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
        m = graph_runtime.create(graph, lib, ctx)

        # upload parameters to device
        image = tvm.nd.array(
            (np.random.uniform(size=(1, 3, 224, 224))).astype('float32'))
        m.set_input(**params)
        m.set_input('data', image)

        # evaluate
        print("Evaluate inference time cost...")
        timer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
        tcost = timer()
        prof_res = np.array(tcost.results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Beispiel #5
0
def test_rpc_remote_module():
    if not tvm.runtime.enabled("rpc"):
        return
    server = rpc.Server("localhost")
    client = rpc.connect(server.host, server.port)
    # graph
    n = tvm.runtime.convert(1024)
    A = te.placeholder((n, ), name='A')
    B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    s = te.create_schedule(B.op)

    def check_remote(remote):
        if not tvm.runtime.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        temp = util.tempdir()
        ctx = remote.cpu(0)
        f = tvm.build(s, [A, B], "llvm", name="myadd")
        path_dso = temp.relpath("dev_lib.so")
        f.export_library(path_dso)
        remote.upload(path_dso)
        f1 = remote.load_module("dev_lib.so")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        time_f = f1.time_evaluator(f1.entry_name, remote.cpu(0), number=10)
        cost = time_f(a, b).mean
        print('%g secs/op' % cost)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    def check_remote_link_cl(remote):
        """Test function to run remote code such as cl

        This is not enabled because there is forking issue
        of TVM runtime when server launches after OpenCL
        runtime initializes. We leave it as an example
        on how to do rpc when we want to do linking on remote.
        """
        if not tvm.runtime.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        if not tvm.runtime.enabled("opencl"):
            print("Skip because opencl is not enabled")
            return
        temp = util.tempdir()
        ctx = remote.cl(0)
        s = te.create_schedule(B.op)
        xo, xi = s[B].split(B.op.axis[0], factor=32)
        s[B].bind(xo, te.thread_axis("blockIdx.x"))
        s[B].bind(xi, te.thread_axis("threadIdx.x"))
        f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd")
        # Option 1: save modules separately and rely on remote compiler
        path_o = temp.relpath("myadd.o")
        path_cl = temp.relpath("myadd.cl")
        path_json = temp.relpath("myadd.tvm_meta.json")
        f.save(path_o)
        f.imported_modules[0].save(path_cl)
        remote.upload(path_o)
        remote.upload(path_cl)
        # upload meta data
        remote.upload(path_json)
        fhost = remote.load_module("myadd.o")
        fdev = remote.load_module("myadd.cl")
        fhost.import_module(fdev)
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        fhost(a, b)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
        # Option 2: export library as a tar ball then handled by remote compiler
        path_tar = temp.relpath("myadd.tar")
        f.export_library(path_tar)
        remote.upload(path_tar)
        fhost = remote.load_module("myadd.tar")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        fhost(a, b)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    check_remote(client)
    check_remote(rpc.LocalSession())
Beispiel #6
0
def run_module(
    tvmc_package: TVMCPackage,
    device: str,
    hostname: Optional[str] = None,
    port: Union[int, str] = 9090,
    rpc_key: Optional[str] = None,
    inputs: Optional[Dict[str, np.ndarray]] = None,
    fill_mode: str = "random",
    repeat: int = 10,
    number: int = 10,
    profile: bool = False,
    options: dict = None,
):
    """Run a compiled graph executor module locally or remotely with
    optional input values.

    If input tensors are not specified explicitly, they can be filled
    with zeroes, ones or random data.

    Parameters
    ----------
    tvmc_package: TVMCPackage
        The compiled model package object that will be run.
    device: str,
        the device (e.g. "cpu" or "cuda") to be targeted by the RPC
        session, local or remote).
    hostname : str, optional
        The hostname of the target device on which to run.
    port : int, optional
        The port of the target device on which to run.
    rpc_key : str, optional
        The tracker key of the target device. If this is set, it
        will be assumed that remote points to a tracker.
    inputs : dict, optional
        A dictionary that maps input names to numpy values. If not provided,
        inputs will be generated using the fill_mode argument.
    fill_mode : str, optional
        The fill-mode to use when generating data for input tensors.
        Valid options are "zeros", "ones" and "random".
        Defaults to "random".
    repeat : int, optional
        How many times to repeat the run.
    number : int, optional
        The number of runs to measure within each repeat.
    profile : bool
        Whether to profile the run with the debug runtime.

    Returns
    -------
    outputs : dict
        a dictionary with output tensors, generated by the module
    times : list of str
        execution times generated by the time evaluator
    """
    if not isinstance(tvmc_package, TVMCPackage):
        raise TVMCException(
            "This model doesn't seem to have been compiled yet. "
            "Try calling tvmc.compile on the model before running it.")

    with ExitStack() as stack:
        # Currently only two package formats are supported: "classic" and
        # "mlf". The later can only be used for micro targets, i.e. with microTVM.
        if device == "micro":
            if tvmc_package.type != "mlf":
                raise TVMCException(
                    f"Model {tvmc_package.package_path} is not a MLF archive.")

            project_dir = get_project_dir(tvmc_package.project_dir)

            # This is guaranteed to work since project_dir was already checked when
            # building the dynamic parser to accommodate the project options, so no
            # checks are in place when calling GeneratedProject.
            project_ = project.GeneratedProject.from_directory(
                project_dir, options)
        else:
            if tvmc_package.type == "mlf":
                raise TVMCException(
                    "You're trying to run a model saved using the Model Library Format (MLF). "
                    "MLF can only be used to run micro device ('--device micro')."
                )

        if hostname:
            if isinstance(port, str):
                port = int(port)
            # Remote RPC
            if rpc_key:
                logger.debug("Running on remote RPC tracker with key %s.",
                             rpc_key)
                session = request_remote(rpc_key, hostname, port, timeout=1000)
            else:
                logger.debug("Running on remote RPC with no key.")
                session = rpc.connect(hostname, port)
        elif device == "micro":
            # Remote RPC (running on a micro target)
            logger.debug("Running on remote RPC (micro target).")
            try:
                session = tvm.micro.Session(project_.transport())
                stack.enter_context(session)
            except:
                raise TVMCException(
                    "Could not open a session with the micro target.")
        else:
            # Local
            logger.debug("Running a local session.")
            session = rpc.LocalSession()

        # Micro targets don't support uploading a model. The model to be run
        # must be already flashed into the micro target before one tries
        # to run it. Hence skip model upload for micro targets.
        if device != "micro":
            session.upload(tvmc_package.lib_path)
            lib = session.load_module(tvmc_package.lib_name)

        # TODO expand to other supported devices, as listed in tvm.rpc.client (@leandron)
        logger.debug("Device is %s.", device)
        if device == "cuda":
            dev = session.cuda()
        elif device == "cl":
            dev = session.cl()
        elif device == "metal":
            dev = session.metal()
        elif device == "vulkan":
            dev = session.vulkan()
        elif device == "rocm":
            dev = session.rocm()
        elif device == "micro":
            dev = session.device
            lib = session.get_system_lib()
        else:
            assert device == "cpu"
            dev = session.cpu()

        # TODO(gromero): Adjust for micro targets.
        if profile:
            logger.debug("Creating runtime with profiling enabled.")
            module = debug_executor.create(tvmc_package.graph,
                                           lib,
                                           dev,
                                           dump_root="./prof")
        else:
            if device == "micro":
                logger.debug(
                    "Creating runtime (micro) with profiling disabled.")
                module = tvm.micro.create_local_graph_executor(
                    tvmc_package.graph, lib, dev)
            else:
                logger.debug("Creating runtime with profiling disabled.")
                module = runtime.create(tvmc_package.graph, lib, dev)

        logger.debug("Loading params into the runtime module.")
        module.load_params(tvmc_package.params)

        logger.debug("Collecting graph input shape and type:")
        shape_dict, dtype_dict = module.get_input_info()
        logger.debug("Graph input shape: %s", shape_dict)
        logger.debug("Graph input type: %s", dtype_dict)

        inputs_dict = make_inputs_dict(shape_dict, dtype_dict, inputs,
                                       fill_mode)

        logger.debug("Setting inputs to the module.")
        module.set_input(**inputs_dict)

        # Run must be called explicitly if profiling
        if profile:
            logger.info("Running the module with profiling enabled.")
            report = module.profile()
            # This print is intentional
            print(report)

        if device == "micro":
            # TODO(gromero): Fix time_evaluator() for micro targets. Once it's
            # fixed module.benchmark() can be used instead and this if/else can
            # be removed.
            module.run()
            times = []
        else:
            # call the benchmarking function of the executor
            times = module.benchmark(dev, number=number, repeat=repeat)

        logger.debug("Collecting the output tensors.")
        num_outputs = module.get_num_outputs()
        outputs = {}
        for i in range(num_outputs):
            output_name = "output_{}".format(i)
            outputs[output_name] = module.get_output(i).numpy()

        return TVMCResult(outputs, times)
Beispiel #7
0
def tune_and_evaluate(tuning_opt):

    if env.TARGET != "sim":
        # Get remote from fleet node
        remote = autotvm.measure.request_remote(
            env.TARGET, tracker_host, tracker_port, timeout=10000
        )
        # Reconfigure the JIT runtime and FPGA.
        vta.reconfig_runtime(remote)
        vta.program_fpga(remote, bitstream=None)
    else:
        # In simulation mode, host the RPC server locally.
        remote = rpc.LocalSession()

    # Register VTA tuning tasks
    register_vta_tuning_tasks()

    # Perform task extraction on Relay program
    print("Extract tasks...")
    relay_prog, params = compile_network(env, target, network, start_pack, stop_pack)
    mod = tvm.IRModule.from_expr(relay_prog)
    tasks = autotvm.task.extract_from_program(
        mod,
        params=params,
        ops=(relay.op.get("nn.conv2d"),),
        target=target,
        target_host=env.target_host,
    )

    # filter out non-packed conv2d task
    tasks = list(filter(lambda t: len(t.args[0][1]) > 4, tasks))

    # We should have extracted 10 convolution tasks
    assert len(tasks) == 10
    print("Extracted {} conv2d tasks:".format(len(tasks)))
    for tsk in tasks:
        inp = tsk.args[0][1]
        wgt = tsk.args[1][1]
        batch = inp[0] * inp[4]
        in_filter = inp[1] * inp[5]
        out_filter = wgt[0] * wgt[4]
        height, width = inp[2], inp[3]
        hkernel, wkernel = wgt[2], wgt[3]
        hstride, wstride = tsk.args[2][0], tsk.args[2][1]
        hpad, wpad = tsk.args[3][0], tsk.args[3][1]
        print(
            "({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})".format(
                batch,
                height,
                width,
                in_filter,
                out_filter,
                hkernel,
                wkernel,
                hpad,
                wpad,
                hstride,
                wstride,
            )
        )

    # We do not run the tuning in our webpage server since it takes too long.
    # Comment the following line to run it by yourself.
    return

    # run tuning tasks
    print("Tuning...")
    tune_tasks(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.tophub.context(target, extra_files=[log_file]):
        # Compile network
        print("Compile...")
        if target.device_name != "vta":
            with tvm.transform.PassContext(opt_level=3, disabled_pass={"AlterOpLayout"}):
                lib = relay.build(
                    relay_prog, target=target, params=params, target_host=env.target_host
                )
        else:
            with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
                lib = relay.build(
                    relay_prog, target=target, params=params, target_host=env.target_host
                )

        # Export library
        print("Upload...")
        temp = util.tempdir()
        lib.save(temp.relpath("graphlib.o"))
        remote.upload(temp.relpath("graphlib.o"))
        lib = remote.load_module("graphlib.o")

        # Generate the graph runtime
        ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
        m = graph_runtime.GraphModule(lib["default"](ctx))

        # upload parameters to device
        image = tvm.nd.array((np.random.uniform(size=(1, 3, 224, 224))).astype("float32"))
        m.set_input("data", image)

        # evaluate
        print("Evaluate inference time cost...")
        timer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
        tcost = timer()
        prof_res = np.array(tcost.results) * 1000  # convert to millisecond
        print(
            "Mean inference time (std dev): %.2f ms (%.2f ms)"
            % (np.mean(prof_res), np.std(prof_res))
        )
Beispiel #8
0
def run_module(
    module_file,
    device,
    hostname=None,
    port=9090,
    rpc_key=None,
    inputs=None,
    fill_mode="random",
    repeat=1,
    profile=False,
):
    """Run a compiled graph executor module locally or remotely with
    optional input values.

    If input tensors are not specified explicitly, they can be filled
    with zeroes, ones or random data.

    Parameters
    ----------
    module_file : str
        The path to the module file (a .tar file).
    device: str,
        the device (e.g. "cpu" or "gpu") to be targeted by the RPC
        session, local or remote).
    hostname : str, optional
        The hostname of the target device on which to run.
    port : int, optional
        The port of the target device on which to run.
    rpc_key : str, optional
        The tracker key of the target device. If this is set, it
        will be assumed that remote points to a tracker.
    inputs : dict, optional
        A dictionary that maps input names to numpy values.
    fill_mode : str, optional
        The fill-mode to use when generating data for input tensors.
        Valid options are "zeros", "ones" and "random".
        Defaults to "random".
    repeat : int, optional
        How many times to repeat the run.
    profile : bool
        Whether to profile the run with the debug runtime.

    Returns
    -------
    outputs : dict
        a dictionary with output tensors, generated by the module
    times : list of str
        execution times generated by the time evaluator
    """

    with tempfile.TemporaryDirectory() as tmp_dir:
        logger.debug("extracting module file %s", module_file)
        t = tarfile.open(module_file)
        t.extractall(tmp_dir)
        graph = open(os.path.join(tmp_dir, "mod.json")).read()
        params = bytearray(
            open(os.path.join(tmp_dir, "mod.params"), "rb").read())

        if hostname:
            # Remote RPC
            if rpc_key:
                logger.debug("running on remote RPC tracker with key %s",
                             rpc_key)
                session = request_remote(rpc_key, hostname, port, timeout=1000)
            else:
                logger.debug("running on remote RPC with no key")
                session = rpc.connect(hostname, port)
        else:
            # Local
            logger.debug("running a local session")
            session = rpc.LocalSession()

        session.upload(os.path.join(tmp_dir, "mod.so"))
        lib = session.load_module("mod.so")

        # TODO expand to other supported devices, as listed in tvm.rpc.client (@leandron)
        logger.debug("device is %s", device)
        if device == "gpu":
            dev = session.gpu()
        elif device == "cl":
            dev = session.cl()
        else:
            assert device == "cpu"
            dev = session.cpu()

        if profile:
            logger.debug("creating runtime with profiling enabled")
            module = debug_executor.create(graph, lib, dev, dump_root="./prof")
        else:
            logger.debug("creating runtime with profiling disabled")
            module = runtime.create(graph, lib, dev)

        logger.debug("load params into the runtime module")
        module.load_params(params)

        shape_dict, dtype_dict = get_input_info(graph, params)
        inputs_dict = make_inputs_dict(shape_dict, dtype_dict, inputs,
                                       fill_mode)

        logger.debug("setting inputs to the module")
        module.set_input(**inputs_dict)

        # Run must be called explicitly if profiling
        if profile:
            logger.debug("running the module with profiling enabled")
            module.run()

        # create the module time evaluator (returns a function)
        timer = module.module.time_evaluator("run", dev, 1, repeat=repeat)
        # call the evaluator function to invoke the module and save execution times
        prof_result = timer()
        # collect a list of execution times from the profiling results
        times = prof_result.results

        logger.debug("collecting the output tensors")
        num_outputs = module.get_num_outputs()
        outputs = {}
        for i in range(num_outputs):
            output_name = "output_{}".format(i)
            outputs[output_name] = module.get_output(i).asnumpy()

        return outputs, times
Beispiel #9
0
def run_module(
    tvmc_package: TVMCPackage,
    device: str,
    hostname: Optional[str] = None,
    port: Union[int, str] = 9090,
    rpc_key: Optional[str] = None,
    inputs: Optional[Dict[str, np.ndarray]] = None,
    fill_mode: str = "random",
    repeat: int = 10,
    number: int = 10,
    profile: bool = False,
):
    """Run a compiled graph executor module locally or remotely with
    optional input values.

    If input tensors are not specified explicitly, they can be filled
    with zeroes, ones or random data.

    Parameters
    ----------
    tvmc_package: TVMCPackage
        The compiled model package object that will be run.
    device: str,
        the device (e.g. "cpu" or "cuda") to be targeted by the RPC
        session, local or remote).
    hostname : str, optional
        The hostname of the target device on which to run.
    port : int, optional
        The port of the target device on which to run.
    rpc_key : str, optional
        The tracker key of the target device. If this is set, it
        will be assumed that remote points to a tracker.
    inputs : dict, optional
        A dictionary that maps input names to numpy values. If not provided,
        inputs will be generated using the fill_mode argument.
    fill_mode : str, optional
        The fill-mode to use when generating data for input tensors.
        Valid options are "zeros", "ones" and "random".
        Defaults to "random".
    repeat : int, optional
        How many times to repeat the run.
    number : int, optional
        The number of runs to measure within each repeat.
    profile : bool
        Whether to profile the run with the debug runtime.

    Returns
    -------
    outputs : dict
        a dictionary with output tensors, generated by the module
    times : list of str
        execution times generated by the time evaluator
    """
    if not isinstance(tvmc_package, TVMCPackage):
        raise TVMCException(
            "This model doesn't seem to have been compiled yet. "
            "Try calling tvmc.compile on the model before running it."
        )

    # Currently only two package formats are supported: "classic" and
    # "mlf". The later can only be used for micro targets, i.e. with µTVM.
    if tvmc_package.type == "mlf":
        raise TVMCException(
            "You're trying to run a model saved using the Model Library Format (MLF)."
            "MLF can only be used to run micro targets (µTVM)."
        )

    if hostname:
        if isinstance(port, str):
            port = int(port)
        # Remote RPC
        if rpc_key:
            logger.debug("Running on remote RPC tracker with key %s.", rpc_key)
            session = request_remote(rpc_key, hostname, port, timeout=1000)
        else:
            logger.debug("Running on remote RPC with no key.")
            session = rpc.connect(hostname, port)
    else:
        # Local
        logger.debug("Running a local session.")
        session = rpc.LocalSession()

    session.upload(tvmc_package.lib_path)
    lib = session.load_module(tvmc_package.lib_name)

    # TODO expand to other supported devices, as listed in tvm.rpc.client (@leandron)
    logger.debug("Device is %s.", device)
    if device == "cuda":
        dev = session.cuda()
    elif device == "cl":
        dev = session.cl()
    else:
        assert device == "cpu"
        dev = session.cpu()

    if profile:
        logger.debug("Creating runtime with profiling enabled.")
        module = debug_executor.create(tvmc_package.graph, lib, dev, dump_root="./prof")
    else:
        logger.debug("Creating runtime with profiling disabled.")
        module = runtime.create(tvmc_package.graph, lib, dev)

    logger.debug("Loading params into the runtime module.")
    module.load_params(tvmc_package.params)

    shape_dict, dtype_dict = get_input_info(tvmc_package.graph, tvmc_package.params)
    inputs_dict = make_inputs_dict(shape_dict, dtype_dict, inputs, fill_mode)

    logger.debug("Setting inputs to the module.")
    module.set_input(**inputs_dict)

    # Run must be called explicitly if profiling
    if profile:
        logger.info("Running the module with profiling enabled.")
        module.run()

    # create the module time evaluator (returns a function)
    timer = module.module.time_evaluator("run", dev, number=number, repeat=repeat)
    # call the evaluator function to invoke the module and save execution times
    prof_result = timer()
    # collect a list of execution times from the profiling results
    times = prof_result.results

    logger.debug("Collecting the output tensors.")
    num_outputs = module.get_num_outputs()
    outputs = {}
    for i in range(num_outputs):
        output_name = "output_{}".format(i)
        outputs[output_name] = module.get_output(i).numpy()

    return TVMCResult(outputs, times)
Beispiel #10
0
    def __init__(self, *args):
        # Parse arguments
        print("\nML element I/O spec: \n", args, '\n')
        model_path = args[0]
        input_shapes = util.shapes_str_to_npshapes(args[1])
        input_types = util.datatypes_str_to_nptypes(args[2])
        output_shapes = util.shapes_str_to_npshapes(args[3])
        output_types = util.datatypes_str_to_nptypes(args[4])
        input_names = util.names_str_to_strarray(args[5])
        output_names = util.names_str_to_strarray(args[6])
        self.input_shapes = input_shapes
        for input_type in input_types:
            if input_type is None:
                print("Invalid input_type")
                return None
        for output_type in output_types:
            if output_type is None:
                print("Invalid output_type")
                return None
        if (len(input_shapes) > 4 or len(input_types) > 4
                or len(input_names) > 4
                or len(input_shapes) != len(input_types)
                or len(input_shapes) != len(input_names)):
            print("Invalid input count: (%d,%d,%d)".format(
                len(input_shapes), len(input_types), len(input_names)))
            return None
        if (len(output_shapes) > 4 or len(output_types) > 4
                or len(output_names) > 4
                or len(output_shapes) != len(output_types)
                or len(output_shapes) != len(output_names)):
            print("Invalid output count: (%d,%d,%d)".format(
                len(output_shapes), len(output_types), len(output_names)))
            return None
        self.input_dims = []
        self.output_dims = []
        self.input_types = input_types
        self.output_types = output_types
        for i, input_shape in enumerate(input_shapes):
            input_dim = nns.TensorShape(input_shape, input_types[i])
            self.input_dims.append(input_dim)
        for i, output_shape in enumerate(output_shapes):
            output_dim = nns.TensorShape(output_shape, output_types[i])
            self.output_dims.append(output_dim)
        self.input_names = input_names
        self.output_names = output_names

        # Initialize TVM runtime session with given binary
        session = rpc.LocalSession()
        session.upload(os.path.join(model_path, "mod.so"))
        lib = session.load_module("mod.so")
        ctx = session.cpu()  # TODO: Hardcoded CPU backend

        # Load graph and create a module
        self.graph = open(os.path.join(model_path, "mod.json")).read()
        self.module = runtime.create(self.graph, lib, ctx)
        self.ctx = ctx

        # Load params
        self.params = bytearray(
            open(os.path.join(model_path, "mod.params"), "rb").read())
        self.module.load_params(self.params)
        return None
Beispiel #11
0
def main(model,
         start_pack,
         stop_pack,
         data_shape=(1, 3, 224, 224),
         dtype='float32'):
    # Make sure that TVM was compiled with RPC=1
    assert tvm.module.enabled("rpc")

    ######################################################################
    # Define the platform and model targets
    # -------------------------------------
    # Execute on CPU vs. VTA, and define the model.

    # Load VTA parameters from the vta/config/vta_config.json file
    env = vta.get_env()

    # Set ``device=arm_cpu`` to run inference on the CPU
    # or ``device=vta`` to run inference on the FPGA.
    device = "vta"
    target = env.target if device == "vta" else env.target_vta_cpu

    # Name of Gluon model to compile
    # The ``start_pack`` and ``stop_pack`` labels indicate where
    # to start and end the graph packing relay pass: in other words
    # where to start and finish offloading to VTA.

    ######################################################################
    # Obtain an execution remote
    # ---------------------------------
    # When target is 'pynq', reconfigure FPGA and runtime.
    # Otherwise, if target is 'sim', execute locally.
    print(f"Target is {env.TARGET}")
    if env.TARGET in ["sim", "tsim"]:
        remote = rpc.LocalSession()
    else:
        print(f"Error, incorrect target for benchmarking: {env.TARGET}")

    # Get execution context from remote
    ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)

    ######################################################################
    # Build the inference graph runtime
    # ---------------------------------
    # Grab ResNet-18 model from Gluon model zoo and compile with Relay.
    # The compilation steps are:
    #    1) Front end translation from MxNet into Relay module.
    #    2) Apply 8-bit quantization: here we skip the first conv layer,
    #       and dense layer which will both be executed in fp32 on the CPU.
    #    3) Perform graph packing to alter the data layout for tensorization.
    #    4) Perform constant folding to reduce number of operators (e.g. eliminate
    #       batch norm multiply).
    #    5) Perform relay build to object file.
    #    6) Load the object file onto remote (FPGA device).
    #    7) Generate graph runtime, `m`.

    # Load pre-configured AutoTVM schedules
    with autotvm.tophub.context(target):

        # Populate the shape and data type dictionary for ResNet input
        dtype_dict = {"data": 'float32'}
        shape_dict = {"data": data_shape}

        # Measure build start time
        build_start = time.time()

        # Start front end compilation
        if model == 'resnet':
            mod, params = test_resnet_mxnet(env)
        elif model == 'yolo':
            mod, params = test_yolo_darknet()
        elif model == 'lenet':
            mod, params = lenet()
        elif model == 'mobilenet':
            mod, params = mobilenet()
        else:
            print(f"Error, incorrect model name: {model}")

        ### Need to bind params

        # Update shape and type dictionary
        shape_dict.update({k: v.shape for k, v in params.items()})
        dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
        with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]):
            relay_prog = relay.quantize.quantize(mod['main'], params=params)

        print(f"Finishing quantizing graph")
        # Perform graph packing and constant folding for VTA target
        if target.device_name == "vta":
            assert env.BLOCK_IN == env.BLOCK_OUT
            relay_prog = graph_pack(relay_prog,
                                    env.BATCH,
                                    env.BLOCK_OUT,
                                    env.WGT_WIDTH,
                                    start_name=start_pack,
                                    stop_name=stop_pack)

        print(f"Finishing packing graph")

        # Compile Relay program with AlterOpLayout disabled
        with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
            if target.device_name != "vta":
                graph, lib, params = relay.build(relay_prog,
                                                 target=target,
                                                 params=params,
                                                 target_host=env.target_host)
            else:
                with vta.build_config():
                    graph, lib, params = relay.build(
                        relay_prog,
                        target=target,
                        params=params,
                        target_host=env.target_host)

        # Measure Relay build time
        build_time = time.time() - build_start
        print(model + " inference graph built in {0:.2f}s!".format(build_time))

        # Send the inference library over to the remote RPC server
        temp = util.tempdir()
        lib.save(temp.relpath("graphlib.o"))
        remote.upload(temp.relpath("graphlib.o"))
        lib = remote.load_module("graphlib.o")

        # Graph runtime
        m = graph_runtime.create(graph, lib, ctx)
    #
    # # Set the network parameters and inputs
    data = np.random.uniform(size=data_shape).astype(dtype)

    m.set_input(**params)
    m.set_input('data', tvm.nd.array(data.astype(dtype)))

    # Perform inference and gather execution statistics
    # More on: https://docs.tvm.ai/api/python/module.html#tvm.module.Module.time_evaluator
    num = 1  # number of times we run module for a single measurement
    rep = 1  # number of measurements (we derive std dev from this)
    timer = m.module.time_evaluator("run", ctx, number=num, repeat=rep)

    if env.TARGET in ["sim", "tsim"]:
        simulator.clear_stats()
        timer()
        sim_stats = simulator.stats()
        print("\nExecution statistics:")
        for k, v in sim_stats.items():
            # Since we execute the workload many times, we need to normalize stats
            # Note that there is always one warm up run
            # Therefore we divide the overall stats by (num * rep + 1)
            print("\t{:<16}: {:>16}".format(k, v // (num * rep + 1)))
    else:
        tcost = timer()
        std = np.std(tcost.results) * 1000
        mean = tcost.mean * 1000
        print("\nPerformed inference in %.2fms (std = %.2f) for %d samples" %
              (mean, std, env.BATCH))
        print("Average per sample inference time: %.2fms" % (mean / env.BATCH))