Esempio n. 1
1
def test_papi(target, dev):
    target = tvm.target.Target(target)
    if str(target.kind) == "llvm":
        metric = "PAPI_FP_OPS"
    elif str(target.kind) == "cuda":
        metric = "cuda:::event:shared_load:device=0"
    else:
        pytest.skip(f"Target {target.kind} not supported by this test")
    mod, params = mlp.get_workload(1)

    exe = relay.vm.compile(mod, target, params=params)
    vm = profiler_vm.VirtualMachineProfiler(exe, dev)

    data = tvm.nd.array(np.random.rand(1, 1, 28, 28).astype("float32"),
                        device=dev)
    report = vm.profile(
        [data],
        func_name="main",
        collectors=[
            tvm.runtime.profiling.PAPIMetricCollector({dev: [metric]})
        ],
    )
    assert metric in str(report)

    csv = read_csv(report)
    assert metric in csv.keys()
    assert any([float(x) > 0 for x in csv[metric]])
Esempio n. 2
0
def test_vm(target, dev):
    dtype = "float32"
    x = relay.var("x", shape=(relay.Any(), relay.Any()), dtype=dtype)
    y = relay.var("y", shape=(relay.Any(), relay.Any()), dtype=dtype)
    mod = tvm.IRModule()
    mod["main"] = relay.Function([x, y], relay.add(x, y))
    exe = relay.vm.compile(mod, target)
    vm = profiler_vm.VirtualMachineProfiler(exe, dev)

    data = np.random.rand(28, 28).astype("float32")
    report = vm.profile(data, data, func_name="main")
    assert "fused_add" in str(report)
    assert "Total" in str(report)
    assert "AllocTensorReg" in str(report)
    assert "AllocStorage" in str(report)

    csv = read_csv(report)
    assert "Hash" in csv.keys()
    # Ops should have a duration greater than zero.
    assert all([
        float(dur) > 0 for dur, name in zip(csv["Duration (us)"], csv["Name"])
        if name[:5] == "fused"
    ])
    # AllocTensor or AllocStorage may be cached, so their duration could be 0.
    assert all([
        float(dur) >= 0 for dur, name in zip(csv["Duration (us)"], csv["Name"])
        if name[:5] != "fused"
    ])
Esempio n. 3
0
def test_vm(target, dev):
    mod, params = mlp.get_workload(1)

    exe = relay.vm.compile(mod, target, params=params)
    vm = profiler_vm.VirtualMachineProfiler(exe, dev)

    data = np.random.rand(1, 1, 28, 28).astype("float32")
    report = vm.profile(data, func_name="main")
    assert "fused_nn_softmax" in report
    assert "Total time" in report
def test_basic(dev, target):
    mod, params = mlp.get_workload(batch_size=1)
    if not profiler_vm.enabled():
        return

    exe = relay.vm.compile(mod, target, params=params)
    vm = profiler_vm.VirtualMachineProfiler(exe, dev)

    data = np.random.rand(1, 1, 28, 28).astype("float32")
    res = vm.profile(tvm.nd.array(data), func_name="main")
    assert "softmax" in str(res)
Esempio n. 5
0
def test_report_serialization():
    mod, params = mlp.get_workload(1)

    exe = relay.vm.compile(mod, "llvm", params=params)
    vm = profiler_vm.VirtualMachineProfiler(exe, tvm.cpu())

    data = np.random.rand(1, 1, 28, 28).astype("float32")
    report = vm.profile(data, func_name="main")

    report2 = Report.from_json(report.json())
    # equality on reports compares pointers, so we compare the printed results instead.
    assert str(report) == str(report2)
Esempio n. 6
0
def test_basic():
    mod, params = resnet.get_workload()
    if not profiler_vm.enabled():
        return

    for target, dev in enabled_targets():
        exe = relay.vm.compile(mod, target, params=params)
        vm = profiler_vm.VirtualMachineProfiler(exe, dev)

        data = np.random.rand(1, 3, 224, 224).astype("float32")
        res = vm.invoke("main", [data])
        print("\n{}".format(vm.get_stat()))
        print("\n{}".format(vm.get_stat(False)))
Esempio n. 7
0
def test_vm_reshape_and_copy():
    target = "llvm"
    dev = tvm.gpu()
    x_np = np.random.uniform(size=(8, 16)).astype("float32")
    x = relay.var("x", shape=(8, 16), dtype="float32")
    y = relay.reshape(x, [-1, 4, 8])
    mod = tvm.IRModule()
    mod["main"] = relay.Function([x], y)
    with tvm.transform.PassContext(opt_level=3):
        exec = relay.vm.compile(mod, "llvm")
    assert "reshape_tensor" in exec.bytecode
    vm = profiler_vm.VirtualMachineProfiler(exec, dev)
    vm.profile(tvm.nd.array(x_np))
Esempio n. 8
0
def test_basic():
    mod, params = resnet.get_workload()
    target = 'llvm'
    ctx = tvm.cpu()
    if not profiler_vm.enabled():
        return
    exe = relay.vm.compile(mod, target, params=params)
    vm = profiler_vm.VirtualMachineProfiler(exe, ctx)

    data = np.random.rand(1, 3, 224, 224).astype('float32')
    res = vm.invoke("main", [data])
    print("\n{}".format(vm.get_stat()))
    print("\n{}".format(vm.get_stat(False)))
Esempio n. 9
0
def test_vm(target, dev):
    mod, params = mlp.get_workload(1)

    exe = relay.vm.compile(mod, target, params=params)
    vm = profiler_vm.VirtualMachineProfiler(exe, dev)

    data = np.random.rand(1, 1, 28, 28).astype("float32")
    report = vm.profile(data, func_name="main")
    assert "fused_nn_softmax" in str(report)
    assert "Total" in str(report)

    csv = read_csv(report)
    assert "Hash" in csv.keys()
    assert all([float(x) > 0 for x in csv["Duration (us)"]])
def test_rpc_vm():
    server = rpc.Server(key="profiling")
    remote = rpc.connect("127.0.0.1", server.port, key="profiling")

    mod, params = mlp.get_workload(1)
    exe = relay.vm.compile(mod, "llvm", params=params)
    temp = utils.tempdir()
    path = temp.relpath("lib.tar")
    exe.mod.export_library(path)
    remote.upload(path)
    rexec = remote.load_module("lib.tar")
    vm = profiler_vm.VirtualMachineProfiler(rexec, remote.cpu())
    report = vm.profile(tvm.nd.array(np.ones((1, 1, 28, 28), dtype="float32"), device=remote.cpu()))
    assert len(report.calls) > 0
Esempio n. 11
0
def test_vm(target, dev):
    mod, params = mlp.get_workload(1)

    exe = relay.vm.compile(mod, target, params=params)
    vm = profiler_vm.VirtualMachineProfiler(exe, dev)

    data = np.random.rand(1, 1, 28, 28).astype("float32")
    report = vm.profile(data, func_name="main")
    assert "fused_nn_softmax" in str(report)
    assert "Total" in str(report)

    f = StringIO(report.csv())
    reader = csv.reader(f, delimiter=",")
    # force parsing
    for row in reader:
        pass
Esempio n. 12
0
def test_json():
    mod, params = mlp.get_workload(1)

    exe = relay.vm.compile(mod, "llvm", params=params)
    vm = profiler_vm.VirtualMachineProfiler(exe, tvm.cpu())

    data = np.random.rand(1, 1, 28, 28).astype("float32")
    report = vm.profile(data, func_name="main")
    parsed = json.loads(report.json())
    assert "device_metrics" in parsed
    assert "calls" in parsed
    assert "Duration (us)" in parsed["calls"][0]
    assert "microseconds" in parsed["calls"][0]["Duration (us)"]
    assert len(parsed["calls"]) > 0
    for call in parsed["calls"]:
        assert isinstance(call["Name"]["string"], str)
        assert isinstance(call["Count"]["count"], int)
        assert isinstance(call["Duration (us)"]["microseconds"], float)
Esempio n. 13
0
def test_vm(target, dev):
    dtype = "float32"
    x = relay.var("x", shape=(relay.Any(), relay.Any()), dtype=dtype)
    y = relay.var("y", shape=(relay.Any(), relay.Any()), dtype=dtype)
    mod = tvm.IRModule()
    mod["main"] = relay.Function([x, y], relay.add(x, y))
    exe = relay.vm.compile(mod, target)
    vm = profiler_vm.VirtualMachineProfiler(exe, dev)

    data = np.random.rand(28, 28).astype("float32")
    report = vm.profile(data, data, func_name="main")
    assert "fused_add" in str(report)
    assert "Total" in str(report)
    assert "AllocTensorReg" in str(report)
    assert "AllocStorage" in str(report)

    csv = read_csv(report)
    assert "Hash" in csv.keys()
    assert all([float(x) > 0 for x in csv["Duration (us)"]])
Esempio n. 14
0
def test_report_serialization():
    mod, params = mlp.get_workload(1)

    exe = relay.vm.compile(mod, "llvm", params=params)
    vm = profiler_vm.VirtualMachineProfiler(exe, tvm.cpu())

    data = np.random.rand(1, 1, 28, 28).astype("float32")
    report = vm.profile(data, func_name="main")

    report2 = Report.from_json(report.json())
    # Equality on reports compares pointers, so we compare the printed
    # results instead.

    # Use .table() instead of str(), because str() includes aggregate
    # and column summations whose values may be impacted by otherwise
    # negligible conversion errors. (2 occurrences / 3000 trials)
    assert report.table(aggregate=False,
                        col_sums=False) == report2.table(aggregate=False,
                                                         col_sums=False)
Esempio n. 15
0
def run_module(
    tvmc_package: TVMCPackage,
    device: str,
    hostname: Optional[str] = None,
    port: Union[int, str] = 9090,
    rpc_key: Optional[str] = None,
    inputs: Optional[Dict[str, np.ndarray]] = None,
    fill_mode: str = "random",
    repeat: int = 10,
    number: int = 10,
    profile: bool = False,
    end_to_end: bool = False,
    options: dict = None,
):
    """Run a compiled graph executor module locally or remotely with
    optional input values.

    If input tensors are not specified explicitly, they can be filled
    with zeroes, ones or random data.

    Parameters
    ----------
    tvmc_package: TVMCPackage
        The compiled model package object that will be run.
    device: str,
        the device (e.g. "cpu" or "cuda") to be targeted by the RPC
        session, local or remote).
    hostname : str, optional
        The hostname of the target device on which to run.
    port : int, optional
        The port of the target device on which to run.
    rpc_key : str, optional
        The tracker key of the target device. If this is set, it
        will be assumed that remote points to a tracker.
    inputs : dict, optional
        A dictionary that maps input names to numpy values. If not provided,
        inputs will be generated using the fill_mode argument.
    fill_mode : str, optional
        The fill-mode to use when generating data for input tensors.
        Valid options are "zeros", "ones" and "random".
        Defaults to "random".
    repeat : int, optional
        How many times to repeat the run.
    number : int, optional
        The number of runs to measure within each repeat.
    profile : bool
        Whether to profile the run with the debug executor.
    end_to_end : bool
        Whether to measure the time of memory copies as well as model
        execution. Turning this on can provide a more realistic estimate
        of how long running the model in production would take.

    Returns
    -------
    outputs : dict
        a dictionary with output tensors, generated by the module
    times : list of str
        execution times generated by the time evaluator
    """
    if not isinstance(tvmc_package, TVMCPackage):
        raise TVMCException(
            "This model doesn't seem to have been compiled yet. "
            "Try calling tvmc.compile on the model before running it.")

    with ExitStack() as stack:
        # Currently only two package formats are supported: "classic" and
        # "mlf". The later can only be used for micro targets, i.e. with microTVM.
        if device == "micro":
            if tvmc_package.type != "mlf":
                raise TVMCException(
                    f"Model {tvmc_package.package_path} is not a MLF archive.")

            project_dir = get_project_dir(tvmc_package.project_dir)

            # This is guaranteed to work since project_dir was already checked when
            # building the dynamic parser to accommodate the project options, so no
            # checks are in place when calling GeneratedProject.
            project_ = project.GeneratedProject.from_directory(
                project_dir, options)
        else:
            if tvmc_package.type == "mlf":
                raise TVMCException(
                    "You're trying to run a model saved using the Model Library Format (MLF). "
                    "MLF can only be used to run micro device ('--device micro')."
                )

        if hostname:
            if isinstance(port, str):
                port = int(port)
            # Remote RPC
            if rpc_key:
                logger.debug("Running on remote RPC tracker with key %s.",
                             rpc_key)
                session = request_remote(rpc_key, hostname, port, timeout=1000)
            else:
                logger.debug("Running on remote RPC with no key.")
                session = rpc.connect(hostname, port)
        elif device == "micro":
            # Remote RPC (running on a micro target)
            logger.debug("Running on remote RPC (micro target).")
            try:
                session = tvm.micro.Session(project_.transport())
                stack.enter_context(session)
            except:
                raise TVMCException(
                    "Could not open a session with the micro target.")
        else:
            # Local
            logger.debug("Running a local session.")
            session = rpc.LocalSession()

        # Micro targets don't support uploading a model. The model to be run
        # must be already flashed into the micro target before one tries
        # to run it. Hence skip model upload for micro targets.
        if device != "micro":
            session.upload(tvmc_package.lib_path)
            lib = session.load_module(tvmc_package.lib_name)

        # TODO expand to other supported devices, as listed in tvm.rpc.client (@leandron)
        logger.debug("Device is %s.", device)
        if device == "cuda":
            dev = session.cuda()
        elif device == "cl":
            dev = session.cl()
        elif device == "metal":
            dev = session.metal()
        elif device == "vulkan":
            dev = session.vulkan()
        elif device == "rocm":
            dev = session.rocm()
        elif device == "micro":
            dev = session.device
            lib = session.get_system_lib()
        else:
            assert device == "cpu"
            dev = session.cpu()

        if tvmc_package.type == "vm":
            assert inputs is not None, "vm runner requires inputs to be provided as a dict"

            input_tensor = {}
            for e, i in inputs.items():
                input_tensor[e] = tvm.nd.array(i, dev)

            if profile:
                logger.debug("Creating vm with profile enabled.")
                exe = profiler_vm.VirtualMachineProfiler(lib, dev)
                res = exe.profile(**input_tensor, func_name="main")
                # This print is intentional
                print(res)
            else:
                exe = vm.VirtualMachine(lib, dev)

            exe_outputs = exe.invoke("main", **input_tensor)
            times = exe.benchmark(
                dev,
                **input_tensor,
                func_name="main",
                repeat=repeat,
                number=number,
                end_to_end=end_to_end,
            )

            # Special handling if the output only has a single value
            if not isinstance(exe_outputs, list):
                exe_outputs = [exe_outputs]

            outputs = {}
            for i, val in enumerate(exe_outputs):
                output_name = "output_{}".format(i)
                outputs[output_name] = val.numpy()
        else:
            # TODO(gromero): Adjust for micro targets.
            if profile:
                logger.debug("Creating runtime with profiling enabled.")
                module = debug_executor.create(tvmc_package.graph,
                                               lib,
                                               dev,
                                               dump_root="./prof")
            else:
                if device == "micro":
                    logger.debug(
                        "Creating runtime (micro) with profiling disabled.")
                    module = tvm.micro.create_local_graph_executor(
                        tvmc_package.graph, lib, dev)
                else:
                    logger.debug("Creating runtime with profiling disabled.")
                    module = executor.create(tvmc_package.graph, lib, dev)

            logger.debug("Loading params into the runtime module.")
            module.load_params(tvmc_package.params)

            logger.debug("Collecting graph input shape and type:")
            shape_dict, dtype_dict = module.get_input_info()
            logger.debug("Graph input shape: %s", shape_dict)
            logger.debug("Graph input type: %s", dtype_dict)

            inputs_dict = make_inputs_dict(shape_dict, dtype_dict, inputs,
                                           fill_mode)

            logger.debug("Setting inputs to the module.")
            module.set_input(**inputs_dict)

            # Run must be called explicitly if profiling
            if profile:
                logger.info("Running the module with profiling enabled.")
                report = module.profile()
                # This print is intentional
                print(report)

            if device == "micro":
                # TODO(gromero): Fix time_evaluator() for micro targets. Once it's
                # fixed module.benchmark() can be used instead and this if/else can
                # be removed.
                module.run()
                times = []
            else:
                # Call the benchmarking function of the executor.
                # Optionally measure e2e data transfers from the
                # CPU to device memory overheads (e.g. PCIE
                # overheads if the device is a discrete GPU).
                if end_to_end:
                    dev = session.cpu()
                times = module.benchmark(dev,
                                         number=number,
                                         repeat=repeat,
                                         end_to_end=end_to_end)

            logger.debug("Collecting the output tensors.")
            num_outputs = module.get_num_outputs()
            outputs = {}
            for i in range(num_outputs):
                output_name = "output_{}".format(i)
                outputs[output_name] = module.get_output(i).numpy()

        return TVMCResult(outputs, times)