Esempio n. 1
0
def _make_virtual_device(device):
    if isinstance(device, _Device):
        return target.make_virtual_device(device)
    if isinstance(device, str):
        return target.make_virtual_device(_nd.device(device))
    raise ValueError("expecting a Device or device name, but received a %s" %
                     (type(device)))
Esempio n. 2
0
def _device_to_int(device):
    if isinstance(device, _Device):
        return device.device_type
    if isinstance(device, str):
        return _nd.device(device).device_type
    raise ValueError("expecting a Device or device name, but received a %s" %
                     (type(device)))
Esempio n. 3
0
def device_copy(data, src_dev, dst_dev):
    """Copy data from the source device to the destination device. This
    operator helps data transferring between difference devices for
    heterogeneous execution.

    Parameters
    ----------
    data : tvm.relay.Expr
        The tensor to be copied.

    src_dev : Union[:py:class:`Device`, str]
        The source device where the data is copied from.

    dst_dev : Union[:py:class:`Device`, str]
        The destination device where the data is copied to.

    Returns
    -------
    result : tvm.relay.Expr
        The copied result.
    """
    if isinstance(src_dev, _Device):
        src_dev = src_dev.device_type
    elif isinstance(src_dev, str):
        src_dev = _nd.device(src_dev).device_type
    else:
        raise ValueError(
            "src_dev is expected to be the type of Device or "
            "str, but received %s" % (type(src_dev))
        )

    if isinstance(dst_dev, _Device):
        dst_dev = dst_dev.device_type
    elif isinstance(dst_dev, str):
        dst_dev = _nd.device(dst_dev).device_type
    else:
        raise ValueError(
            "dst_dev is expected to be the type of Device or "
            "str, but received %s" % (type(dst_dev))
        )
    return _make.device_copy(data, src_dev, dst_dev)
Esempio n. 4
0
    def device(self, dev_type, dev_id=0):
        """Construct a remote device.

        Parameters
        ----------
        dev_type: int or str

        dev_id: int, optional

        Returns
        -------
        dev: Device
            The corresponding encoded remote device.
        """
        dev = nd.device(dev_type, dev_id)
        encode = (self._tbl_index + 1) * base.RPC_SESS_MASK
        dev.device_type += encode
        dev._rpc_sess = self
        return dev
Esempio n. 5
0
def on_device(data, device):
    """Annotate an expression with a certain device type.

    Parameters
    ----------
    data : tvm.relay.Expr
        The expression to be annotated.

    device : Union[:py:class:`Device`, str]
        The device type to annotate.

    Returns
    -------
    result : tvm.relay.Expr
        The annotated expression.
    """
    if isinstance(device, _Device):
        device = device.device_type
    elif isinstance(device, str):
        device = _nd.device(device).device_type
    else:
        raise ValueError("device is expected to be the type of Device or "
                         "str, but received %s" % (type(device)))
    return _make.on_device(data, device)
Esempio n. 6
0
def build(inputs,
          args=None,
          target=None,
          target_host=None,
          name="default_function",
          binds=None):
    """Build a function with arguments as signature. Code will be generated
    for devices coupled with target information.

    Parameters
    ----------
    inputs : tvm.te.Schedule, IRModule, or dict of target to IRModule
        The schedule to be built

    args : list of Buffer or Tensor or Var, optional
        The argument lists to the function.

    target : str or :any:`tvm.target.Target`, optional
        The target and option of the compilation.

    target_host : str or :any:`tvm.target.Target` optional
        Host compilation target, if target is device.
        When TVM compiles device specific program such as CUDA,
        we also need host(CPU) side code to interact with the driver
        setup the dimensions and parameters correctly.
        target_host is used to specify the host side codegen target.
        By default, llvm is used if it is enabled,
        otherwise a stackvm intepreter is used.

    name : str, optional
        The name of result function.

    binds : dict, optional
        Dictionary that maps the binding of symbolic buffer to Tensor.
        By default, a new buffer is created for each tensor in the argument.

    Returns
    -------
    ret : tvm.module
        A module that combines both host and device code.

    Examples
    ________
    There are two typical example uses of this function depending on the type
    of the argument `inputs`:
    1. it is an IRModule.

    .. code-block:: python

        n = 2
        A = te.placeholder((n,), name='A')
        B = te.placeholder((n,), name='B')
        C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
        s = tvm.te.create_schedule(C.op)
        m = tvm.lower(s, [A, B, C], name="test_add")
        rt_mod = tvm.build(m, target="llvm")

    2. it is a dict of compilation target to IRModule.

    .. code-block:: python

        n = 2
        A = te.placeholder((n,), name='A')
        B = te.placeholder((n,), name='B')
        C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
        s1 = tvm.te.create_schedule(C.op)
        with tvm.target.cuda() as cuda_tgt:
          s2 = topi.cuda.schedule_injective(cuda_tgt, [C])
          m1 = tvm.lower(s1, [A, B, C], name="test_add1")
          m2 = tvm.lower(s2, [A, B, C], name="test_add2")
          rt_mod = tvm.build({"llvm": m1, "cuda": m2}, target_host="llvm")

    Note
    ----
    See the note on :any:`tvm.target` on target string format.
    """
    if isinstance(inputs, schedule.Schedule):
        if args is None:
            raise ValueError("args must be given for build from schedule")
        input_mod = lower(inputs, args, name=name, binds=binds)
    elif isinstance(inputs, (list, tuple, container.Array)):
        merged_mod = tvm.IRModule({})
        for x in inputs:
            merged_mod.update(x)
        input_mod = merged_mod
    elif isinstance(inputs, tvm.IRModule):
        input_mod = inputs
    elif not isinstance(inputs, (dict, container.Map)):
        raise ValueError(
            f"Inputs must be Schedule, IRModule or dict of target to IRModule, "
            f"but got {type(inputs)}.")

    if not isinstance(inputs, (dict, container.Map)):
        target = Target.current() if target is None else target
        target = target if target else "llvm"
        target_input_mod = {target: input_mod}
    else:
        target_input_mod = inputs

    for tar, mod in target_input_mod.items():
        if not isinstance(tar, (str, Target)):
            raise ValueError("The key of inputs must be str or "
                             "Target when inputs is dict.")
        if not isinstance(mod, tvm.IRModule):
            raise ValueError("inputs must be Schedule, IRModule,"
                             "or dict of str to IRModule.")

    target_input_mod, target_host = Target.check_and_update_host_consist(
        target_input_mod, target_host)

    if not target_host:
        for tar, mod in target_input_mod.items():
            tar = Target(tar)
            device_type = ndarray.device(tar.kind.name, 0).device_type
            if device_type == ndarray.cpu(0).device_type:
                target_host = tar
                break
    if not target_host:
        target_host = "llvm" if tvm.runtime.enabled("llvm") else "stackvm"

    target_input_mod, target_host = Target.check_and_update_host_consist(
        target_input_mod, target_host)

    mod_host_all = tvm.IRModule({})

    device_modules = []
    for tar, input_mod in target_input_mod.items():
        mod_host, mdev = _build_for_device(input_mod, tar, target_host)
        mod_host_all.update(mod_host)
        device_modules.append(mdev)

    # Generate a unified host module.
    rt_mod_host = codegen.build_module(mod_host_all, target_host)

    # Import all modules.
    for mdev in device_modules:
        if mdev:
            rt_mod_host.import_module(mdev)

    if not isinstance(target_host, Target):
        target_host = Target(target_host)
    if (target_host.attrs.get("runtime", tvm.runtime.String("c++")) == "c"
            and target_host.attrs.get("system-lib", 0) == 1):
        if target_host.kind.name == "c":
            create_csource_crt_metadata_module = tvm._ffi.get_global_func(
                "runtime.CreateCSourceCrtMetadataModule")
            return create_csource_crt_metadata_module([rt_mod_host],
                                                      target_host)

        if target_host.kind.name == "llvm":
            create_llvm_crt_metadata_module = tvm._ffi.get_global_func(
                "runtime.CreateLLVMCrtMetadataModule")
            return create_llvm_crt_metadata_module([rt_mod_host], target_host)

    return rt_mod_host
Esempio n. 7
0
def _build_for_device(input_mod, target, target_host):
    """Build the lowered functions for a device with the given compilation
    target.

    Parameters
    ----------
    input_mod : IRModule
        The schedule to be built.

    target : str or :any:`tvm.target.Target`
        The target and option of the compilation.

    target_host : str or :any:`tvm.target.Target`
        The host compilation target.

    Returns
    -------
    fhost : IRModule
        The host IRModule.

    mdev : tvm.module
        A module that contains device code.
    """
    target, target_host = Target.check_and_update_host_consist(
        target, target_host)
    device_type = ndarray.device(target.kind.name, 0).device_type

    mod_mixed = input_mod
    mod_mixed = tvm.tir.transform.Apply(
        lambda f: f.with_attr("target", target))(mod_mixed)

    opt_mixed = [tvm.tir.transform.VerifyMemory()]
    if len(mod_mixed.functions) == 1:
        opt_mixed += [
            tvm.tir.transform.Apply(
                lambda f: f.with_attr("tir.is_entry_func", True))
        ]

    if PassContext.current().config.get("tir.detect_global_barrier", False):
        opt_mixed += [tvm.tir.transform.ThreadSync("global")]
    opt_mixed += [
        tvm.tir.transform.ThreadSync("shared"),
        tvm.tir.transform.ThreadSync("warp"),
        tvm.tir.transform.InferFragment(),
        tvm.tir.transform.LowerThreadAllreduce(),
        tvm.tir.transform.MakePackedAPI(),
        tvm.tir.transform.SplitHostDevice(),
    ]
    mod_mixed = tvm.transform.Sequential(opt_mixed)(mod_mixed)

    # device optimizations
    opt_device = tvm.transform.Sequential([
        tvm.tir.transform.Filter(
            lambda f: "calling_conv" in f.attrs and f.attrs[
                "calling_conv"].value == CallingConv.DEVICE_KERNEL_LAUNCH),
        tvm.tir.transform.LowerWarpMemory(),
        tvm.tir.transform.Simplify(),
        tvm.tir.transform.LowerDeviceStorageAccessInfo(),
        tvm.tir.transform.LowerCustomDatatypes(),
        tvm.tir.transform.LowerIntrin(),
    ])
    mod_dev = opt_device(mod_mixed)

    # host optimizations
    opt_host = tvm.transform.Sequential([
        tvm.tir.transform.Filter(
            lambda f: "calling_conv" not in f.attrs or f.attrs[
                "calling_conv"].value != CallingConv.DEVICE_KERNEL_LAUNCH),
        tvm.tir.transform.Apply(lambda f: f.with_attr("target", target_host)),
        tvm.tir.transform.LowerTVMBuiltin(),
        tvm.tir.transform.LowerDeviceStorageAccessInfo(),
        tvm.tir.transform.LowerCustomDatatypes(),
        tvm.tir.transform.LowerIntrin(),
        tvm.tir.transform.CombineContextCall(),
    ])
    mod_host = opt_host(mod_mixed)

    if device_type == ndarray.cpu(0).device_type and target_host == target:
        assert len(mod_dev.functions) == 0
    if "gpu" in target.keys and len(mod_dev.functions) == 0:
        warnings.warn(
            "Specified target %s, but cannot find device code, did you do "
            "bind?" % target)

    rt_mod_dev = codegen.build_module(
        mod_dev, target) if len(mod_dev.functions) != 0 else None
    return mod_host, rt_mod_dev
Esempio n. 8
0
def build(
    inputs: Union[schedule.Schedule, PrimFunc, IRModule, Mapping[str,
                                                                 IRModule]],
    args: Optional[List[Union[Buffer, tensor.Tensor, Var]]] = None,
    target: Optional[Union[str, Target]] = None,
    target_host: Optional[Union[str, Target]] = None,
    runtime: Optional[
        "tvm.relay.backend.Runtime"] = None,  # Type is annotated this way to avoid cyclic dependency
    name: Optional[str] = "default_function",
    binds: Optional[Mapping[tensor.Tensor, Buffer]] = None,
):
    """Build a function with arguments as signature. Code will be generated
    for devices coupled with target information.

    Parameters
    ----------
    inputs : Union[tvm.te.schedule.Schedule,
        tvm.tir.PrimFunc, IRModule, Mapping[str, IRModule]]
        The input to be built

    args : Optional[List[Union[tvm.tir.Buffer, tensor.Tensor, Var]]]
        The argument lists to the function.

    target : Optional[Union[str, Target]]
        The target and option of the compilation.

    target_host : Optional[Union[str, Target]]
        Host compilation target, if target is device.
        When TVM compiles device specific program such as CUDA,
        we also need host(CPU) side code to interact with the driver
        setup the dimensions and parameters correctly.
        target_host is used to specify the host side codegen target.
        By default, llvm is used if it is enabled,
        otherwise a stackvm interpreter is used.

    runtime : Optional[Runtime]
        Runtime to generate artifacts for

    name : Optional[str]
        The name of result function.

    binds : Optional[Mapping[tensor.Tensor, tvm.tir.Buffer]]
        Dictionary that maps the binding of symbolic buffer to Tensor.
        By default, a new buffer is created for each tensor in the argument.

    Returns
    -------
    ret : tvm.module
        A module that combines both host and device code.

    Examples
    ________
    There are two typical example uses of this function depending on the type
    of the argument `inputs`:
    1. it is an IRModule.

    .. code-block:: python

        n = 2
        A = te.placeholder((n,), name='A')
        B = te.placeholder((n,), name='B')
        C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
        s = tvm.te.create_schedule(C.op)
        m = tvm.lower(s, [A, B, C], name="test_add")
        rt_mod = tvm.build(m, target="llvm")

    2. it is a dict of compilation target to IRModule.

    .. code-block:: python

        n = 2
        A = te.placeholder((n,), name='A')
        B = te.placeholder((n,), name='B')
        C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
        s1 = tvm.te.create_schedule(C.op)
        with tvm.target.cuda() as cuda_tgt:
          s2 = topi.cuda.schedule_injective(cuda_tgt, [C])
          m1 = tvm.lower(s1, [A, B, C], name="test_add1")
          m2 = tvm.lower(s2, [A, B, C], name="test_add2")
          rt_mod = tvm.build({"llvm": m1, "cuda": m2})

    Note
    ----
    See the note on :any:`tvm.target` on target string format.
    """
    if isinstance(inputs, schedule.Schedule):
        if args is None:
            raise ValueError("args must be given for build from schedule")
        input_mod = lower(inputs, args, name=name, binds=binds)
    elif isinstance(inputs, (list, tuple, container.Array)):
        merged_mod = tvm.IRModule({})
        for x in inputs:
            merged_mod.update(lower(x))
        input_mod = merged_mod
    elif isinstance(inputs, PrimFunc):
        input_mod = lower(inputs, name=name)
    elif isinstance(inputs, tvm.IRModule):
        if name is not None:
            warnings.warn("Specifying name with IRModule input is useless")
        input_mod = lower(inputs)
    elif not isinstance(inputs, (dict, container.Map)):
        raise ValueError(
            f"Inputs must be Schedule, IRModule or dict of target to IRModule, "
            f"but got {type(inputs)}.")

    if target_host is not None:
        warnings.warn(
            "target_host parameter is going to be deprecated. "
            "Please pass in tvm.target.Target(target, host=target_host) instead."
        )

    if not isinstance(inputs, (dict, container.Map)):
        target = Target.current() if target is None else target
        target = target if target else "llvm"
        target_input_mod = {target: input_mod}
    else:
        target_input_mod = inputs

    # Because modules can be created from a variety of sources, we annotate them
    # with the relevant attributes here to ensure they propagate
    annotated_mods = {}
    for tar, mod in target_input_mod.items():
        if not isinstance(tar, (str, Target)):
            raise ValueError("The key of inputs must be str or "
                             "Target when inputs is dict.")
        if not isinstance(mod, tvm.IRModule):
            raise ValueError("inputs must be Schedule, IRModule,"
                             "or dict of str to IRModule.")
        annotated_mods[tar] = mod.with_attr("runtime", runtime)

    annotated_mods, target_host = Target.check_and_update_host_consist(
        annotated_mods, target_host)

    if not target_host:
        for tar, mod in annotated_mods.items():
            tar = Target(tar)
            device_type = ndarray.device(tar.kind.name, 0).device_type
            if device_type == ndarray.cpu(0).device_type:
                target_host = tar
                break
    if not target_host:
        target_host = "llvm" if tvm.runtime.enabled("llvm") else "stackvm"

    annotated_mods, target_host = Target.check_and_update_host_consist(
        annotated_mods, target_host)

    rt_mod_host = _driver_ffi.preprocess_module(annotated_mods, target_host)

    annotated_mods, target_host = Target.check_and_update_host_consist(
        annotated_mods, target_host)

    if not isinstance(target_host, Target):
        target_host = Target(target_host)

    if str(runtime) == "crt" and runtime["system-lib"]:
        if target_host.kind.name == "c":
            create_csource_crt_metadata_module = tvm._ffi.get_global_func(
                "runtime.CreateCSourceCrtMetadataModule")
            to_return = create_csource_crt_metadata_module([rt_mod_host],
                                                           target_host,
                                                           runtime)
        elif target_host.kind.name == "llvm":
            create_llvm_crt_metadata_module = tvm._ffi.get_global_func(
                "runtime.CreateLLVMCrtMetadataModule")
            to_return = create_llvm_crt_metadata_module([rt_mod_host],
                                                        target_host, runtime)
    else:
        to_return = rt_mod_host

    return OperatorModule.from_module(to_return,
                                      ir_module_by_target=annotated_mods,
                                      name=name)
Esempio n. 9
0
def _timed_eval_func(
    inp_serialized,
    build_res,
    number,
    repeat,
    min_repeat_ms,
    cooldown_interval,
    enable_cpu_cache_flush,
    verbose,
):
    # pylint: disable=import-outside-toplevel
    from .search_task import get_task_input_buffer  # lazily import to avoid recursive dependency

    inp = MeasureInput.deserialize(inp_serialized)
    task_input_names = inp.task.task_input_names
    tic = time.time()
    error_no = 0
    error_msg = None
    try:
        func = module.load_module(build_res.filename)
        dev = ndarray.device(str(inp.task.target), 0)
        # Limitation:
        # We can not get PackFunction directly in the remote mode as it is wrapped
        # under the std::function. We could lift the restriction later once we fold
        # the PackedFunc as an object. Currently, we pass function name to work
        # around it.
        f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else ""
        time_f = func.time_evaluator(
            func.entry_name,
            dev,
            number=number,
            repeat=repeat,
            min_repeat_ms=min_repeat_ms,
            f_preproc=f_prepare,
        )
    # pylint: disable=broad-except
    except Exception:
        costs = (MAX_FLOAT, )
        error_no = MeasureErrorNo.COMPILE_DEVICE
        error_msg = make_traceback_info()

    if error_no == 0:
        try:
            random_fill = tvm.get_global_func("tvm.contrib.random.random_fill",
                                              True)
            assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"

            tensor_input_map = prepare_input_map(
                build_res.args) if task_input_names else {}
            args = []
            task_inputs_count = 0
            for arg in build_res.args:
                if arg in tensor_input_map:
                    tensor_name = tensor_input_map[arg]
                    if tensor_name in task_input_names:
                        args.append(
                            ndarray.array(
                                get_task_input_buffer(inp.task.workload_key,
                                                      tensor_name), dev))
                        task_inputs_count += 1
                    else:
                        raise ValueError(
                            "%s not found in task_inputs, " % (tensor_name) +
                            "should provide with `SearchTask(..., task_inputs={...})`"
                        )
                else:
                    empty_array = ndarray.empty(get_const_tuple(arg.shape),
                                                arg.dtype, dev)
                    random_fill(empty_array)
                    args.append(empty_array)
            if task_inputs_count != len(task_input_names):
                logger.warning(
                    "task_inputs not fully matched, check if there's any unexpected error"
                )
            dev.sync()
            costs = time_f(*args).results
        # pylint: disable=broad-except
        except Exception:
            costs = (MAX_FLOAT, )
            error_no = MeasureErrorNo.RUNTIME_DEVICE
            error_msg = make_traceback_info()

    shutil.rmtree(os.path.dirname(build_res.filename))
    toc = time.time()
    time.sleep(cooldown_interval)

    if verbose >= 1:
        if error_no == MeasureErrorNo.NO_ERROR:
            print("*", end="", flush=True)
        else:
            print("*E", end="", flush=True)  # Run error
    return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
Esempio n. 10
0
def _timed_eval_func(
    inp_serialized,
    build_res,
    args,
    number,
    repeat,
    min_repeat_ms,
    cooldown_interval,
    enable_cpu_cache_flush,
    verbose,
):
    inp = MeasureInput.deserialize(inp_serialized)
    tic = time.time()
    error_no = 0
    error_msg = None
    try:
        func = module.load_module(build_res.filename)
        dev = ndarray.device(str(inp.task.target), 0)
        # Limitation:
        # We can not get PackFunction directly in the remote mode as it is wrapped
        # under the std::function. We could lift the restriction later once we fold
        # the PackedFunc as an object. Currently, we pass function name to work
        # around it.
        f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else ""
        time_f = func.time_evaluator(
            func.entry_name,
            dev,
            number=number,
            repeat=repeat,
            min_repeat_ms=min_repeat_ms,
            f_preproc=f_prepare,
        )
    # pylint: disable=broad-except
    except Exception:
        costs = (MAX_FLOAT, )
        error_no = MeasureErrorNo.COMPILE_DEVICE
        error_msg = make_traceback_info()

    if error_no == 0:
        try:
            random_fill = tvm.get_global_func("tvm.contrib.random.random_fill",
                                              True)
            assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"
            assert len(args) == len(build_res.args)
            # pylint: disable=consider-using-enumerate
            for idx in range(len(args)):
                if args[idx] is None:
                    build_res_arg = build_res.args[idx]
                    empty_array = ndarray.empty(
                        get_const_tuple(build_res_arg.shape),
                        build_res_arg.dtype, dev)
                    random_fill(empty_array)
                    args[idx] = empty_array
                else:
                    args[idx] = ndarray.array(args[idx], dev)
            dev.sync()
            costs = time_f(*args).results
        # pylint: disable=broad-except
        except Exception:
            costs = (MAX_FLOAT, )
            error_no = MeasureErrorNo.RUNTIME_DEVICE
            error_msg = make_traceback_info()

    shutil.rmtree(os.path.dirname(build_res.filename))
    toc = time.time()
    time.sleep(cooldown_interval)

    if verbose >= 1:
        if error_no == MeasureErrorNo.NO_ERROR:
            print("*", end="", flush=True)
        else:
            print("*E", end="", flush=True)  # Run error
    return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc