Example #1
0
def _build_func_common(measure_input, runtime=None, check_gpu=None, build_option=None):
    """Common part for building a configuration"""
    target, task, config = measure_input
    target, task.target_host = Target.check_and_update_host_consist(target, task.target_host)

    with target:
        s, args = task.instantiate(config)

        # check invalidity of template and code hash consistency
        if not config.valid():
            raise InstantiationError(config.errors)

        opts = build_option or {}
        if check_gpu:  # Add verify pass to filter out invalid configs in advance.
            opts["tir.add_lower_pass"] = [(2, gpu_verify_pass(**check_gpu))]

        # if target is vta, we need to use vta build
        if (
            hasattr(measure_input.target, "device_name")
            and measure_input.target.device_name == "vta"
        ):
            # pylint: disable=import-outside-toplevel
            import vta

            func = vta.build(s, args, target_host=task.target_host)
        else:
            with tvm.ir.transform.PassContext(config=opts):
                func = build(s, args, target_host=task.target_host, runtime=runtime)
    return func, tuple((get_const_tuple(x.shape), x.dtype) for x in args)
Example #2
0
    def __init__(
        self,
        func=None,
        args=None,
        compute_dag=None,
        workload_key=None,
        target=None,
        target_host=None,
        hardware_params=None,
        layout_rewrite_option=None,
        task_inputs=None,
        task_inputs_overwrite=False,
        task_inputs_save_to_file=False,
        desc="",
    ):
        assert (
            func is not None or workload_key is not None
        ), "Either a workload generation function or a workload key should be provided"

        if func is not None:
            workload_key = make_workload_key(func, args)
        if compute_dag is None:
            compute_dag = ComputeDAG(workload_key)

        assert target is not None, "Must specify a target."

        target, target_host = Target.check_and_update_host_consist(
            target, target_host)

        if layout_rewrite_option is None:
            layout_rewrite_option = LayoutRewriteOption.get_target_default(
                target)

        task_input_names = []
        if isinstance(task_inputs, list):
            task_input_names = task_inputs
        elif isinstance(task_inputs, dict):
            for input_name in task_inputs:
                register_task_input_buffer(
                    workload_key,
                    input_name,
                    task_inputs[input_name],
                    task_inputs_overwrite,
                    task_inputs_save_to_file,
                )
                task_input_names.append(input_name)
        elif task_inputs is not None:
            raise ValueError("task_inputs should be a dict or a list.")

        self.__init_handle_by_constructor__(
            _ffi_api.SearchTask,
            compute_dag,
            workload_key,
            target,
            target_host,
            hardware_params,
            layout_rewrite_option,
            task_input_names,
            desc,
        )
Example #3
0
    def lower(self, mod, target=None, target_host=None):
        """Lower the module to VM bytecode.

        Parameters
        ----------
        mod : tvm.IRModule
            The Relay module to build.

        target : str, :any:`tvm.target.Target`, or dict of str(i.e.
            device/context name) to str/tvm.target.Target, optional
            For heterogeneous compilation, it is a dictionary indicating context
            to target mapping. For homogeneous compilation, it is a build target.

        target_host : str or :any:`tvm.target.Target`, optional
            Host compilation target, if target is device.
            When TVM compiles device specific program such as CUDA,
            we also need host(CPU) side code to interact with the driver
            to setup the dimensions and parameters correctly.
            target_host is used to specify the host side codegen target.
            By default, llvm is used if it is enabled,
            otherwise a stackvm intepreter is used.
        """
        target = self._update_target(target)
        target_host = self._update_target_host(target, target_host)
        target, target_host = Target.check_and_update_host_consist(
            target, target_host, target_is_dict_key=False
        )

        tophub_context = self._tophub_context(target)
        with tophub_context:
            self._lower(mod, target, target_host)
Example #4
0
def get_sample_task(target=tvm.target.cuda(), target_host=None):
    target, target_host = Target.check_and_update_host_consist(target, target_host)
    """return a sample task for testing"""
    task = autotvm.task.create(
        "testing/conv2d_no_batching", args=(1, 7, 7, 512, 512, 3, 3), target=target
    )
    return task, target
Example #5
0
def test_check_and_update_host_consist_1():
    target = None
    host = "llvm"
    with pytest.raises(
            AssertionError,
            match=r"Target host is not empty when target is empty."):
        target, host = Target.check_and_update_host_consist(target, host)
Example #6
0
    def __setstate__(self, state):
        # Register the workload if needed
        try:
            workload = json.loads(state["workload_key"])
        except Exception:  # pylint: disable=broad-except
            raise RuntimeError("Invalid workload key %s" % state["workload_key"])

        # workload[0] is either the compute function name or the ComputeDAG hash.
        # The compute functions are already registered when importing TVM, so here
        # we only register the ComputeDAG workloads. If the same workload has
        # already been registered, the later registration overrides the prvious one.
        if workload[0] not in WORKLOAD_FUNC_REGISTRY:
            register_workload_tensors(state["workload_key"], state["compute_dag"].tensors)

        state["target"], state["target_host"] = Target.check_and_update_host_consist(
            state["target"], state["target_host"]
        )
        self.__init_handle_by_constructor__(
            _ffi_api.SearchTask,
            state["compute_dag"],
            state["workload_key"],
            state["target"],
            state["target"].host,
            state["hardware_params"],
            state["layout_rewrite_option"],
            state["task_input_names"],
            state["desc"],
        )
Example #7
0
def _build_module_no_factory(mod, target=None, target_host=None, params=None, mod_name="default"):
    """A wrapper around build which discards the Python GraphFactoryRuntime.
    This wrapper is suitable to be used from other programming languages as
    the runtime::Module can be freely passed between language boundaries.
    """
    target, target_host = Target.check_and_update_host_consist(target, target_host)
    return build(mod, target, params=params, mod_name=mod_name).module
Example #8
0
def main():
    """Main function"""
    parser = argparse.ArgumentParser()
    parser.add_argument("--target",
                        type=str,
                        default="llvm",
                        help="The build target")
    parser.add_argument("--target-host",
                        type=str,
                        default=None,
                        help="The host code compilation target")
    parser.add_argument("--rpc-host",
                        type=str,
                        default="127.0.0.1",
                        help="the hostname of the server")
    parser.add_argument("--rpc-port",
                        type=int,
                        default=9090,
                        help="The port of the RPC")

    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    args.target, args.target_host = Target.check_and_update_host_consist(
        args.target, args.target_host)
    measure_peak_all(args.target, args.target_host, args.rpc_host,
                     args.rpc_port)
def extract_from_program(mod, params, target, target_host=None, ops=None):
    """Extract tuning tasks from a relay program.

    This function is the single program version of extract_from_multiple_program.

    Parameters
    ----------
    mod: tvm.IRModule or relay.function.Function
        The module or function to tune
    params: dict of str to numpy array
        The associated parameters of the program
    target: tvm.target.Target
        The compilation target
    target_host: tvm.target.Target
        The host compilation target
    ops: List[tvm.ir.Op] or None
        List of relay ops to be tuned. If not specified, all tunable ops will be extracted.

    Returns
    -------
    task: Array of autotvm.task.Task
        collected tasks
    """
    target, target_host = Target.check_and_update_host_consist(
        target, target_host)
    return extract_from_multiple_program([mod], [params], target, ops=ops)
Example #10
0
def test_check_and_update_host_consist_3():
    target = Target(target="cuda", host="llvm")
    host = None
    target, host = Target.check_and_update_host_consist(target, host)
    assert target.kind.name == "cuda"
    assert target.host.kind.name == "llvm"
    assert host.kind.name == "llvm"
    assert target.host == host
Example #11
0
def measure_bandwidth_all_types(
    total_item, item_per_thread, n_times, target, target_host, remote, dev, verbose=True
):
    """measure memory bandwidth for all types

    Parameters
    ----------
    total_item: int
        number of elements in input array
    item_per_thread: int
        number of elements each thread accmulates
    n_times: int
        number of runs for averaging
    target: :any:`tvm.target.Target`
        the target and option of the compilation.
    target_host : str or :any:`tvm.target.Target`
        host compilation target
    remote: tvm.rpc.RPCSession
        remote rpc session
    dev: Device
        the device of array
    verbose: bool
        whether outputs immediate result

    Returns
    -------
    result: list
        a list of (type_name, GBPS) pairs
    """
    target, target_host = Target.check_and_update_host_consist(target, target_host)
    max_threads = target.max_num_threads

    result = []
    for base_type in ["float"]:
        for bits in [32]:
            for lanes in [1, 2, 4, 8, 16]:
                max_speed = -1e9
                # try different strides
                for stride in [max_threads, total_item // (lanes * item_per_thread)]:
                    speed = measure_bandwidth_sum(
                        total_item,
                        item_per_thread,
                        stride,
                        base_type,
                        bits,
                        lanes,
                        target,
                        target_host,
                        remote,
                        dev,
                        n_times,
                    )
                    max_speed = max(max_speed, speed)
                type_name = base_type + str(bits)
                result.append(["%sx%d" % (type_name, lanes), max_speed])
                if verbose:
                    logging.info("\t%-10s %.2f GBPS", result[-1][0], result[-1][1])
    return result
Example #12
0
    def build(self, mod, target=None, target_host=None, params=None):
        """
        Parameters
        ----------
        mod : :py:class:`~tvm.IRModule`
            The IRModule to build.

        target : str, :any:`tvm.target.Target`, or dict of str(i.e.
        device/context name) to str/tvm.target.Target, optional
            For heterogeneous compilation, it is a dictionary indicating context
            to target mapping. For homogeneous compilation, it is a build target.

        target_host : str or :any:`tvm.target.Target`, optional
            Host compilation target, if target is device.
            When TVM compiles device specific program such as CUDA,
            we also need host(CPU) side code to interact with the driver
            to setup the dimensions and parameters correctly.
            target_host is used to specify the host side codegen target.
            By default, llvm is used if it is enabled,
            otherwise a stackvm intepreter is used.

        params : dict of str to NDArray
            Input parameters to the graph that do not change
            during inference time. Used for constant folding.

        Returns
        -------
        factory_module : tvm.relay.backend.graph_executor_factory.GraphExecutorFactoryModule
            The runtime factory for the TVM graph executor.
        """
        target = _update_target(target)
        target, target_host = Target.check_and_update_host_consist(
            target, target_host, target_is_dict_key=False)

        # Setup the params.
        if params:
            self._set_params(params)

        # Build the IR module. If auto_scheduler is not enabled,
        # then use the TOPI-defined schedule.
        use_auto_scheduler = PassContext.current().config.get(
            "relay.backend.use_auto_scheduler", False)

        # Turn off AutoTVM config not found warnings if auto_scheduler is enabled.
        old_autotvm_silent = autotvm.GLOBAL_SCOPE.silent
        autotvm.GLOBAL_SCOPE.silent = use_auto_scheduler

        self._build(mod, target, target_host)
        autotvm.GLOBAL_SCOPE.silent = old_autotvm_silent

        # Get artifacts
        graph_json = self.get_json()
        mod = self.get_module()
        params = self.get_params()

        return graph_json, mod, params
def test_check_and_update_host_consist_4():
    """Test `check_and_update_host_consist` by using TVM Objects"""
    cuda_device_type = tvm.device("cuda").device_type
    target = {cuda_device_type: Target(target="cuda", host="llvm")}
    host = None
    target_1, host_1 = Target.check_and_update_host_consist(target, host)
    assert isinstance(target_1, dict)
    assert target_1[cuda_device_type].kind.name == "cuda"
    assert target_1[cuda_device_type].host.kind.name == "llvm"
    assert host_1 is None

    target = {cuda_device_type: Target(tvm.runtime.container.String("cuda"))}
    host = Target(tvm.runtime.container.String("llvm"))
    target = tvm.runtime.convert(target)
    assert isinstance(target, tvm.ir.container.Map)
    target_2, host_2 = Target.check_and_update_host_consist(target, host)
    assert isinstance(target_2, dict)
    assert target_2[cuda_device_type].kind.name == "cuda"
    assert host_2.kind.name == "llvm"
Example #14
0
    def __setstate__(self, state):
        import cloudpickle  # pylint: disable=import-outside-toplevel

        self.name = state["name"]
        self.args = state["args"]
        self.kwargs = state["kwargs"]
        self.config_space = state["config_space"]
        self.func = cloudpickle.loads(state["func"])
        self.flop = state["flop"]
        self.target, self.target_host = Target.check_and_update_host_consist(
            state["target"], state["target_host"])
Example #15
0
 def __getstate__(self):
     self.target, self.target_host = Target.check_and_update_host_consist(
         self.target, self.target_host)
     return {
         "compute_dag": self.compute_dag,
         "workload_key": self.workload_key,
         "target": self.target,
         "target_host": self.target_host,
         "hardware_params": self.hardware_params,
         "layout_rewrite_option": self.layout_rewrite_option,
         "task_input_names": self.task_input_names,
         "desc": self.desc,
     }
Example #16
0
def autoscheduler_get_tuning_tasks(
    mod: tvm.IRModule,
    params: Dict[str, tvm.nd.NDArray],
    target: str,
    target_host: Optional[str] = None,
    alter_layout: Optional[str] = None,
    hardware_params: Optional[HardwareParams] = None,
    include_simple_tasks: bool = False,
):
    """Get the autoscheduler tuning tasks for a given relay module.

    Parameters
    ----------
    mod : tvm.IRModule
        The relay module from which to extract tuning tasks.
    params : dict
        The params for the relay module.
    target : tvm.target.Target
        The compilation target.
    target_host : str, optional
        The compilation target for the host.
    alter_layout : str, optional
        The layout to convert the graph to. Note, the convert layout
        pass doesn't currently guarantee the whole of the graph will
        be converted to the chosen layout.
    hardware_params : Optional[HardwareParams]
        Hardware parameters used for the search tasks

    Returns
    -------
    tasks : list of autotvm.Tasks
        list of tasks to be tuned
    weights : List[int]
        the weight (i.e. the number of appearance) of extracted tasks
    """
    target, target_host = Target.check_and_update_host_consist(
        target, target_host)

    if alter_layout:
        mod = common.convert_graph_layout(mod, alter_layout)

    # Extract the tasks
    tasks, task_weights = auto_scheduler.extract_tasks(
        mod["main"],
        params,
        target=target,
        hardware_params=hardware_params,
        include_simple_tasks=include_simple_tasks,
    )

    return tasks, task_weights
Example #17
0
def compile(mod, target=None, target_host=None, params=None):
    """Compile the module to VM executable. A helper function for VMCompiler.

    Parameters
    ----------
    mod : tvm.IRModule
        The Relay module to build.

    target : str, :any:`tvm.target.Target`, or dict of str(i.e.
        device/context name) to str/tvm.target.Target, optional
        For heterogeneous compilation, it is a dictionary indicating context
        to target mapping. For homogeneous compilation, it is a build target.

    target_host : str or :any:`tvm.target.Target`, optional
        Host compilation target, if target is device.
        When TVM compiles device specific program such as CUDA,
        we also need host(CPU) side code to interact with the driver
        to setup the dimensions and parameters correctly.
        target_host is used to specify the host side codegen target.
        By default, llvm is used if it is enabled,
        otherwise a stackvm intepreter is used.

    params : dict of str to NDArray
        Input parameters to the graph that do not change
        during inference time. Used for constant folding.

    Returns
    -------
    exec : tvm.runtime.vm.Executable
        The VM executable that contains both library code and bytecode.
    """
    if target_host is not None:
        warnings.warn(
            "target_host parameter is going to be deprecated. "
            "Please pass in tvm.target.Target(target, host=target_host) instead."
        )
    target, target_host = Target.check_and_update_host_consist(
        target, target_host, target_is_dict_key=False
    )
    compiler = VMCompiler()
    if params:
        compiler.set_params(params)
    compiler.lower(mod, target)
    compiler.codegen()
    return compiler.get_exec()
Example #18
0
    def __getstate__(self):
        # custom pickle implementation is required for
        # some unpickable local task functions.
        # So we only pickle the name of the function
        # and restore the function by name when unpickling it.
        import cloudpickle  # pylint: disable=import-outside-toplevel

        self.target, self.target_host = Target.check_and_update_host_consist(
            self.target, self.target_host)
        return {
            "name": self.name,
            "args": self.args,
            "kwargs": self.kwargs,
            "config_space": self.config_space,
            "flop": self.flop,
            "target": self.target,
            "target_host": self.target_host,
            "func": cloudpickle.dumps(self.func),
        }
Example #19
0
def _local_build_worker(inp_serialized, build_func, verbose):
    tic = time.time()
    inp = MeasureInput.deserialize(inp_serialized)
    task = inp.task
    task.target, task.target_host = Target.check_and_update_host_consist(
        task.target, task.target_host
    )

    error_no = MeasureErrorNo.NO_ERROR
    error_msg = None
    args = []

    try:
        sch, args = task.compute_dag.apply_steps_from_state(
            inp.state, layout_rewrite=task.layout_rewrite_option
        )
    # pylint: disable=broad-except
    except Exception:
        error_no = MeasureErrorNo.INSTANTIATION_ERROR
        error_msg = make_traceback_info()

    if error_no == 0:
        dirname = tempfile.mkdtemp()
        filename = os.path.join(dirname, "tmp_func." + build_func.output_format)

        try:
            with transform.PassContext():
                func = build_module.build(sch, args, target=task.target)
            func.export_library(filename, build_func)
        # pylint: disable=broad-except
        except Exception:
            error_no = MeasureErrorNo.COMPILE_HOST
            error_msg = make_traceback_info()
    else:
        filename = ""

    if verbose >= 1:
        if error_no == MeasureErrorNo.NO_ERROR:
            print(".", end="", flush=True)
        else:
            print(".E", end="", flush=True)  # Build error

    return filename, args, error_no, error_msg, time.time() - tic
Example #20
0
def autotvm_get_tuning_tasks(
    mod: tvm.IRModule,
    params: Dict[str, tvm.nd.NDArray],
    target: str,
    target_host: Optional[str] = None,
    alter_layout: Optional[str] = None,
):
    """Get the autotvm tuning tasks for a given relay module.

    Parameters
    ----------
    mod : tvm.IRModule
        The relay module from which to extract tuning tasks.
    params : dict
        The params for the relay module.
    target : tvm.target.Target
        The compilation target.
    target_host : str, optional
        The compilation target for the host.
    alter_layout : str, optional
        The layout to convert the graph to. Note, the convert layout
        pass doesn't currently guarantee the whole of the graph will
        be converted to the chosen layout.

    Returns
    -------
    tasks : list of autotvm.Tasks
        list of tasks to be tuned
    """
    target, target_host = Target.check_and_update_host_consist(
        target, target_host)

    if alter_layout:
        mod = common.convert_graph_layout(mod, alter_layout)

    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        params=params,
    )

    return tasks
Example #21
0
def create(task_name, args, target, target_host=None):
    """Create a tuning task and initialize its search space

    Parameters
    ----------
    task_name : str
        The AutoTVM task name
    args : List
        Positional arguments
    target : Target
        The compilation target
    target_host: Target, optional
        The compilation target for host side

    Returns
    -------
    tsk: Task
        a task object
    """
    args = serialize_args(args)
    ret = Task(task_name, args)

    if isinstance(target, str):
        target = Target(target)

    target, target_host = Target.check_and_update_host_consist(
        target, target_host)

    # init config space
    ret.config_space = ConfigSpace()

    ctx = ApplyConfig(ret.config_space)
    with ctx:
        with target:
            sch, _ = ret.func(*args)
            ret.config_space.code_hash = getattr(sch, "code_hash", None)

    ret.flop = ret.config_space.flop or compute_flop(sch)
    ret.target = target
    ret.target_host = target_host

    return ret
Example #22
0
    def optimize(self, mod, target=None, target_host=None, params=None):
        """Helper method that optimizes a Relay module via VM.

        Parameters
        ----------
        mod : tvm.IRModule

        target : str, :any:`tvm.target.Target`, or dict of str (i.e.
            device/context name) to str/tvm.target.Target, optional

        target_host : str or :any:`tvm.target.Target`, optional
            The compilation target for host.
            By default, llvm is used if it is enabled,
            otherwise a stackvm intepreter is used.

        params : dict of str to NDArray
            Input parameters to the graph that do not change
            during inference time. Used for constant folding.

        Returns
        -------
        mod : tvm.IRModule
            The optimized relay module.

        params : dict
            The parameters of the final module.
        """
        if target_host is not None:
            warnings.warn(
                "target_host parameter is going to be deprecated. "
                "Please pass in tvm.target.Target(target, host=target_host) instead."
            )
        target = self._update_target(target)
        target_host = self._update_target_host(target, target_host)
        target, target_host = Target.check_and_update_host_consist(
            target, target_host, target_is_dict_key=False
        )

        if params:
            self.set_params(params)
        return self._optimize(mod, target, target_host), self.get_params()
Example #23
0
def recover_measure_input(inp, rebuild_state=False):
    """
    Recover a deserialized MeasureInput by rebuilding the missing fields.
    1. Rebuid the compute_dag in inp.task
    2. (Optional) Rebuild the stages in inp.state

    Parameters
    ----------
    inp: MeasureInput
        The deserialized MeasureInput
    rebuild_state: bool = False
        Whether rebuild the stages in MeasureInput.State

    Returns
    -------
    new_input: MeasureInput
        The fully recovered MeasureInput with all fields rebuilt.
    """
    # pylint: disable=import-outside-toplevel
    from .search_task import SearchTask  # lazily import to avoid recursive dependency

    task = inp.task
    task.target, task.target_host = Target.check_and_update_host_consist(
        task.target, task.target_host
    )
    new_task = SearchTask(
        workload_key=task.workload_key,
        target=task.target,
        hardware_params=task.hardware_params,
        layout_rewrite_option=task.layout_rewrite_option,
        task_inputs=list(task.task_input_names),
    )

    if rebuild_state:
        new_state = new_task.compute_dag.infer_bound_from_state(inp.state)
    else:
        new_state = inp.state

    return MeasureInput(new_task, new_state)
Example #24
0
def measure_peak_all(target, target_host, host, port):
    """measure memory bandwidth and peak compute for gpu devices

    Parameters
    ----------
    target: str or :any:`tvm.target.Target`
    target_host: str
    host: str
    port: int
    """

    target, target_host = Target.check_and_update_host_consist(target, target_host)
    remote = rpc.connect(host, port)
    n_times = 20

    bandwidth_total_item = 1 << 25
    bandwidth_item_per_thread = 32

    compute_total_item = 1 << 21
    compute_item_per_thread = 4096

    if str(target).startswith("opencl"):
        dev = remote.cl()
    elif str(target).startswith("cuda"):
        dev = remote.cuda()
    elif str(target).startswith("metal"):
        dev = remote.metal()
    else:
        raise RuntimeError("Unsupported target")

    logging.info("========== measure memory bandwidth ==========")
    measure_bandwidth_all_types(
        bandwidth_total_item, bandwidth_item_per_thread, n_times, target, target_host, remote, dev
    )

    logging.info("========== measure peak compute ==========")
    measure_compute_all_types(
        compute_total_item, compute_item_per_thread, n_times, target, target_host, remote, dev
    )
Example #25
0
def build(mod, target, target_host=None):
    """Backend build function.

    Parameters
    ----------
    mod : tvm.IRModule or Dict[str, tvm.IRModule]
        Input module

    target : tvm.Target
        The target to run the code on.

    target_host : tvm.Target
        The host target.

    Returns
    -------
    module : tvm.Module
        The runtime module.
    """
    target_host = None if target_host == "" else target_host
    target, target_host = Target.check_and_update_host_consist(
        target, target_host)
    return tvm.driver.build(mod, target=target)
Example #26
0
def build(inputs,
          args=None,
          target=None,
          target_host=None,
          name="default_function",
          binds=None):
    """Build a function with arguments as signature. Code will be generated
    for devices coupled with target information.

    Parameters
    ----------
    inputs : tvm.te.Schedule, IRModule, or dict of target to IRModule
        The schedule to be built

    args : list of Buffer or Tensor or Var, optional
        The argument lists to the function.

    target : str or :any:`tvm.target.Target`, optional
        The target and option of the compilation.

    target_host : str or :any:`tvm.target.Target` optional
        Host compilation target, if target is device.
        When TVM compiles device specific program such as CUDA,
        we also need host(CPU) side code to interact with the driver
        setup the dimensions and parameters correctly.
        target_host is used to specify the host side codegen target.
        By default, llvm is used if it is enabled,
        otherwise a stackvm intepreter is used.

    name : str, optional
        The name of result function.

    binds : dict, optional
        Dictionary that maps the binding of symbolic buffer to Tensor.
        By default, a new buffer is created for each tensor in the argument.

    Returns
    -------
    ret : tvm.module
        A module that combines both host and device code.

    Examples
    ________
    There are two typical example uses of this function depending on the type
    of the argument `inputs`:
    1. it is an IRModule.

    .. code-block:: python

        n = 2
        A = te.placeholder((n,), name='A')
        B = te.placeholder((n,), name='B')
        C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
        s = tvm.te.create_schedule(C.op)
        m = tvm.lower(s, [A, B, C], name="test_add")
        rt_mod = tvm.build(m, target="llvm")

    2. it is a dict of compilation target to IRModule.

    .. code-block:: python

        n = 2
        A = te.placeholder((n,), name='A')
        B = te.placeholder((n,), name='B')
        C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
        s1 = tvm.te.create_schedule(C.op)
        with tvm.target.cuda() as cuda_tgt:
          s2 = topi.cuda.schedule_injective(cuda_tgt, [C])
          m1 = tvm.lower(s1, [A, B, C], name="test_add1")
          m2 = tvm.lower(s2, [A, B, C], name="test_add2")
          rt_mod = tvm.build({"llvm": m1, "cuda": m2}, target_host="llvm")

    Note
    ----
    See the note on :any:`tvm.target` on target string format.
    """
    if isinstance(inputs, schedule.Schedule):
        if args is None:
            raise ValueError("args must be given for build from schedule")
        input_mod = lower(inputs, args, name=name, binds=binds)
    elif isinstance(inputs, (list, tuple, container.Array)):
        merged_mod = tvm.IRModule({})
        for x in inputs:
            merged_mod.update(x)
        input_mod = merged_mod
    elif isinstance(inputs, tvm.IRModule):
        input_mod = inputs
    elif not isinstance(inputs, (dict, container.Map)):
        raise ValueError(
            f"Inputs must be Schedule, IRModule or dict of target to IRModule, "
            f"but got {type(inputs)}.")

    if not isinstance(inputs, (dict, container.Map)):
        target = Target.current() if target is None else target
        target = target if target else "llvm"
        target_input_mod = {target: input_mod}
    else:
        target_input_mod = inputs

    for tar, mod in target_input_mod.items():
        if not isinstance(tar, (str, Target)):
            raise ValueError("The key of inputs must be str or "
                             "Target when inputs is dict.")
        if not isinstance(mod, tvm.IRModule):
            raise ValueError("inputs must be Schedule, IRModule,"
                             "or dict of str to IRModule.")

    target_input_mod, target_host = Target.check_and_update_host_consist(
        target_input_mod, target_host)

    if not target_host:
        for tar, mod in target_input_mod.items():
            tar = Target(tar)
            device_type = ndarray.device(tar.kind.name, 0).device_type
            if device_type == ndarray.cpu(0).device_type:
                target_host = tar
                break
    if not target_host:
        target_host = "llvm" if tvm.runtime.enabled("llvm") else "stackvm"

    target_input_mod, target_host = Target.check_and_update_host_consist(
        target_input_mod, target_host)

    mod_host_all = tvm.IRModule({})

    device_modules = []
    for tar, input_mod in target_input_mod.items():
        mod_host, mdev = _build_for_device(input_mod, tar, target_host)
        mod_host_all.update(mod_host)
        device_modules.append(mdev)

    # Generate a unified host module.
    rt_mod_host = codegen.build_module(mod_host_all, target_host)

    # Import all modules.
    for mdev in device_modules:
        if mdev:
            rt_mod_host.import_module(mdev)

    if not isinstance(target_host, Target):
        target_host = Target(target_host)
    if (target_host.attrs.get("runtime", tvm.runtime.String("c++")) == "c"
            and target_host.attrs.get("system-lib", 0) == 1):
        if target_host.kind.name == "c":
            create_csource_crt_metadata_module = tvm._ffi.get_global_func(
                "runtime.CreateCSourceCrtMetadataModule")
            return create_csource_crt_metadata_module([rt_mod_host],
                                                      target_host)

        if target_host.kind.name == "llvm":
            create_llvm_crt_metadata_module = tvm._ffi.get_global_func(
                "runtime.CreateLLVMCrtMetadataModule")
            return create_llvm_crt_metadata_module([rt_mod_host], target_host)

    return rt_mod_host
Example #27
0
def _build_for_device(input_mod, target, target_host):
    """Build the lowered functions for a device with the given compilation
    target.

    Parameters
    ----------
    input_mod : IRModule
        The schedule to be built.

    target : str or :any:`tvm.target.Target`
        The target and option of the compilation.

    target_host : str or :any:`tvm.target.Target`
        The host compilation target.

    Returns
    -------
    fhost : IRModule
        The host IRModule.

    mdev : tvm.module
        A module that contains device code.
    """
    target, target_host = Target.check_and_update_host_consist(
        target, target_host)
    device_type = ndarray.device(target.kind.name, 0).device_type

    mod_mixed = input_mod
    mod_mixed = tvm.tir.transform.Apply(
        lambda f: f.with_attr("target", target))(mod_mixed)

    opt_mixed = [tvm.tir.transform.VerifyMemory()]
    if len(mod_mixed.functions) == 1:
        opt_mixed += [
            tvm.tir.transform.Apply(
                lambda f: f.with_attr("tir.is_entry_func", True))
        ]

    if PassContext.current().config.get("tir.detect_global_barrier", False):
        opt_mixed += [tvm.tir.transform.ThreadSync("global")]
    opt_mixed += [
        tvm.tir.transform.ThreadSync("shared"),
        tvm.tir.transform.ThreadSync("warp"),
        tvm.tir.transform.InferFragment(),
        tvm.tir.transform.LowerThreadAllreduce(),
        tvm.tir.transform.MakePackedAPI(),
        tvm.tir.transform.SplitHostDevice(),
    ]
    mod_mixed = tvm.transform.Sequential(opt_mixed)(mod_mixed)

    # device optimizations
    opt_device = tvm.transform.Sequential([
        tvm.tir.transform.Filter(
            lambda f: "calling_conv" in f.attrs and f.attrs[
                "calling_conv"].value == CallingConv.DEVICE_KERNEL_LAUNCH),
        tvm.tir.transform.LowerWarpMemory(),
        tvm.tir.transform.Simplify(),
        tvm.tir.transform.LowerDeviceStorageAccessInfo(),
        tvm.tir.transform.LowerCustomDatatypes(),
        tvm.tir.transform.LowerIntrin(),
    ])
    mod_dev = opt_device(mod_mixed)

    # host optimizations
    opt_host = tvm.transform.Sequential([
        tvm.tir.transform.Filter(
            lambda f: "calling_conv" not in f.attrs or f.attrs[
                "calling_conv"].value != CallingConv.DEVICE_KERNEL_LAUNCH),
        tvm.tir.transform.Apply(lambda f: f.with_attr("target", target_host)),
        tvm.tir.transform.LowerTVMBuiltin(),
        tvm.tir.transform.LowerDeviceStorageAccessInfo(),
        tvm.tir.transform.LowerCustomDatatypes(),
        tvm.tir.transform.LowerIntrin(),
        tvm.tir.transform.CombineContextCall(),
    ])
    mod_host = opt_host(mod_mixed)

    if device_type == ndarray.cpu(0).device_type and target_host == target:
        assert len(mod_dev.functions) == 0
    if "gpu" in target.keys and len(mod_dev.functions) == 0:
        warnings.warn(
            "Specified target %s, but cannot find device code, did you do "
            "bind?" % target)

    rt_mod_dev = codegen.build_module(
        mod_dev, target) if len(mod_dev.functions) != 0 else None
    return mod_host, rt_mod_dev
Example #28
0
def build(
    inputs: Union[schedule.Schedule, PrimFunc, IRModule, Mapping[str,
                                                                 IRModule]],
    args: Optional[List[Union[Buffer, tensor.Tensor, Var]]] = None,
    target: Optional[Union[str, Target]] = None,
    target_host: Optional[Union[str, Target]] = None,
    runtime: Optional[
        "tvm.relay.backend.Runtime"] = None,  # Type is annotated this way to avoid cyclic dependency
    name: Optional[str] = "default_function",
    binds: Optional[Mapping[tensor.Tensor, Buffer]] = None,
):
    """Build a function with arguments as signature. Code will be generated
    for devices coupled with target information.

    Parameters
    ----------
    inputs : Union[tvm.te.schedule.Schedule,
        tvm.tir.PrimFunc, IRModule, Mapping[str, IRModule]]
        The input to be built

    args : Optional[List[Union[tvm.tir.Buffer, tensor.Tensor, Var]]]
        The argument lists to the function.

    target : Optional[Union[str, Target]]
        The target and option of the compilation.

    target_host : Optional[Union[str, Target]]
        Host compilation target, if target is device.
        When TVM compiles device specific program such as CUDA,
        we also need host(CPU) side code to interact with the driver
        setup the dimensions and parameters correctly.
        target_host is used to specify the host side codegen target.
        By default, llvm is used if it is enabled,
        otherwise a stackvm interpreter is used.

    runtime : Optional[Runtime]
        Runtime to generate artifacts for

    name : Optional[str]
        The name of result function.

    binds : Optional[Mapping[tensor.Tensor, tvm.tir.Buffer]]
        Dictionary that maps the binding of symbolic buffer to Tensor.
        By default, a new buffer is created for each tensor in the argument.

    Returns
    -------
    ret : tvm.module
        A module that combines both host and device code.

    Examples
    ________
    There are two typical example uses of this function depending on the type
    of the argument `inputs`:
    1. it is an IRModule.

    .. code-block:: python

        n = 2
        A = te.placeholder((n,), name='A')
        B = te.placeholder((n,), name='B')
        C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
        s = tvm.te.create_schedule(C.op)
        m = tvm.lower(s, [A, B, C], name="test_add")
        rt_mod = tvm.build(m, target="llvm")

    2. it is a dict of compilation target to IRModule.

    .. code-block:: python

        n = 2
        A = te.placeholder((n,), name='A')
        B = te.placeholder((n,), name='B')
        C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
        s1 = tvm.te.create_schedule(C.op)
        with tvm.target.cuda() as cuda_tgt:
          s2 = topi.cuda.schedule_injective(cuda_tgt, [C])
          m1 = tvm.lower(s1, [A, B, C], name="test_add1")
          m2 = tvm.lower(s2, [A, B, C], name="test_add2")
          rt_mod = tvm.build({"llvm": m1, "cuda": m2})

    Note
    ----
    See the note on :any:`tvm.target` on target string format.
    """
    if isinstance(inputs, schedule.Schedule):
        if args is None:
            raise ValueError("args must be given for build from schedule")
        input_mod = lower(inputs, args, name=name, binds=binds)
    elif isinstance(inputs, (list, tuple, container.Array)):
        merged_mod = tvm.IRModule({})
        for x in inputs:
            merged_mod.update(lower(x))
        input_mod = merged_mod
    elif isinstance(inputs, PrimFunc):
        input_mod = lower(inputs, name=name)
    elif isinstance(inputs, tvm.IRModule):
        if name is not None:
            warnings.warn("Specifying name with IRModule input is useless")
        input_mod = lower(inputs)
    elif not isinstance(inputs, (dict, container.Map)):
        raise ValueError(
            f"Inputs must be Schedule, IRModule or dict of target to IRModule, "
            f"but got {type(inputs)}.")

    if target_host is not None:
        warnings.warn(
            "target_host parameter is going to be deprecated. "
            "Please pass in tvm.target.Target(target, host=target_host) instead."
        )

    if not isinstance(inputs, (dict, container.Map)):
        target = Target.current() if target is None else target
        target = target if target else "llvm"
        target_input_mod = {target: input_mod}
    else:
        target_input_mod = inputs

    # Because modules can be created from a variety of sources, we annotate them
    # with the relevant attributes here to ensure they propagate
    annotated_mods = {}
    for tar, mod in target_input_mod.items():
        if not isinstance(tar, (str, Target)):
            raise ValueError("The key of inputs must be str or "
                             "Target when inputs is dict.")
        if not isinstance(mod, tvm.IRModule):
            raise ValueError("inputs must be Schedule, IRModule,"
                             "or dict of str to IRModule.")
        annotated_mods[tar] = mod.with_attr("runtime", runtime)

    annotated_mods, target_host = Target.check_and_update_host_consist(
        annotated_mods, target_host)

    if not target_host:
        for tar, mod in annotated_mods.items():
            tar = Target(tar)
            device_type = ndarray.device(tar.kind.name, 0).device_type
            if device_type == ndarray.cpu(0).device_type:
                target_host = tar
                break
    if not target_host:
        target_host = "llvm" if tvm.runtime.enabled("llvm") else "stackvm"

    annotated_mods, target_host = Target.check_and_update_host_consist(
        annotated_mods, target_host)

    rt_mod_host = _driver_ffi.preprocess_module(annotated_mods, target_host)

    annotated_mods, target_host = Target.check_and_update_host_consist(
        annotated_mods, target_host)

    if not isinstance(target_host, Target):
        target_host = Target(target_host)

    if str(runtime) == "crt" and runtime["system-lib"]:
        if target_host.kind.name == "c":
            create_csource_crt_metadata_module = tvm._ffi.get_global_func(
                "runtime.CreateCSourceCrtMetadataModule")
            to_return = create_csource_crt_metadata_module([rt_mod_host],
                                                           target_host,
                                                           runtime)
        elif target_host.kind.name == "llvm":
            create_llvm_crt_metadata_module = tvm._ffi.get_global_func(
                "runtime.CreateLLVMCrtMetadataModule")
            to_return = create_llvm_crt_metadata_module([rt_mod_host],
                                                        target_host, runtime)
    else:
        to_return = rt_mod_host

    return OperatorModule.from_module(to_return,
                                      ir_module_by_target=annotated_mods,
                                      name=name)
Example #29
0
def build(ir_mod,
          target=None,
          target_host=None,
          params=None,
          mod_name="default"):
    # fmt: off
    # pylint: disable=line-too-long
    """Helper function that builds a Relay function to run on TVM graph executor.

    Parameters
    ----------
    ir_mod : :py:class:`~tvm.IRModule`
        The IR module to build. Using relay.Function is deprecated.

    target : str, :any:`tvm.target.Target`, or dict of str(i.e. device/context name) to str/tvm.target.Target, optional
        For heterogeneous compilation, it is a dictionary indicating context to
        target mapping. For homogeneous compilation, it is a build target.

    target_host : str or :any:`tvm.target.Target`, optional
        Host compilation target, if target is device.
        When TVM compiles device specific program such as CUDA,
        we also need host(CPU) side code to interact with the driver
        setup the dimensions and parameters correctly.
        target_host is used to specify the host side codegen target.
        By default, llvm is used if it is enabled,
        otherwise a stackvm intepreter is used.

    params : dict of str to NDArray
        Input parameters to the graph that do not change
        during inference time. Used for constant folding.

    mod_name: Optional[str]
        The module name we will build

    Returns
    -------
    factory_module : tvm.relay.backend.executor_factory.ExecutorFactoryModule
            The runtime factory for the TVM graph executor.
    """
    # pylint: enable=line-too-long
    # fmt: on

    if not isinstance(ir_mod, (IRModule, _function.Function)):
        raise ValueError("Type of input parameter mod must be tvm.IRModule")

    if isinstance(ir_mod, _function.Function):
        if params:
            ir_mod = bind_params_by_name(ir_mod, params)
        ir_mod = IRModule.from_expr(ir_mod)
        warnings.warn(
            "Please use input parameter mod (tvm.IRModule) "
            "instead of deprecated parameter mod (tvm.relay.function.Function)",
            DeprecationWarning,
        )
    target = build_target_by_device_type_map(target)
    if isinstance(target_host, (str, Target)):
        target_host = Target(target_host)
    elif target_host:
        raise ValueError("target host must be the type of str, " +
                         "tvm.target.Target, or None")

    target, target_host = Target.check_and_update_host_consist(
        target, target_host, target_is_dict_key=False)

    # Retrieve the executor from the target
    executor = get_executor_from_target(target, target_host)

    # If current dispatch context is fallback context (the default root context),
    # then load pre-tuned parameters from TopHub
    if isinstance(autotvm.DispatchContext.current, autotvm.FallbackContext):
        tophub_context = autotvm.tophub.context(list(target.values()))
    else:
        tophub_context = autotvm.utils.EmptyContext()

    with tophub_context:
        bld_mod = BuildModule()
        executor_config, runtime_mod, params = bld_mod.build(mod=ir_mod,
                                                             target=target,
                                                             params=params,
                                                             executor=executor,
                                                             mod_name=mod_name)
        func_metadata = bld_mod.get_function_metadata()

        if executor == "aot":
            executor_factory = _executor_factory.AOTExecutorFactoryModule(
                ir_mod, target, runtime_mod, mod_name, params, func_metadata)
        elif executor == "graph":
            executor_factory = _executor_factory.GraphExecutorFactoryModule(
                ir_mod, target, executor_config, runtime_mod, mod_name, params,
                func_metadata)
        else:
            assert False, "Executor " + executor + " not supported"

        return executor_factory
Example #30
0
def tune_model(
    tvmc_model: TVMCModel,
    target: str,
    tuning_records: Optional[str] = None,
    prior_records: Optional[str] = None,
    enable_autoscheduler: bool = False,
    rpc_key: Optional[str] = None,
    hostname: Optional[str] = None,
    port: Optional[Union[int, str]] = 9090,
    trials: int = 10000,
    target_host: Optional[str] = None,
    tuner: str = "xgb",
    min_repeat_ms: Optional[int] = None,
    early_stopping: Optional[int] = None,
    desired_layout: Optional[str] = None,
    timeout: int = 10,
    repeat: int = 1,
    number: int = 10,
    parallel: int = 4,
    hardware_params: Optional[HardwareParams] = None,
    include_simple_tasks: bool = False,
    log_estimated_latency: bool = False,
    additional_target_options: Optional[Dict[str, Dict[str, Any]]] = None,
):
    """Use tuning to automatically optimize the functions in a model.

    Parameters
    ----------
    tvmc_model : TVMCModel
        The model to be optimized.
    target : str
        Compilation target as plain string, inline JSON or path to a JSON file.
    tuning_records: str, optional
        The path to a file that tuning results will be saved to. If not specified,
        a temporary file will be used.
    prior_records: str, optional
        A path to previous tuning results that will be used to hot-start the tuning
        cost model if provided.
    enable_autoscheduler : bool, optional
        When true, use autoscheduling rather than autotvm. This should produce
        faster kernels for compatible model-target pairs.
    rpc_key : str, optional
        The RPC tracker key of the target device. Required when rpc_tracker is provided.
    hostname : str, optional
        The IP address of an RPC tracker, used when benchmarking remotely.
    port : int or str, optional
        The port of the RPC tracker to connect to. Defaults to 9090.
    trials : int, optional
        The number of schedules to try out for the entire model. Note that the default
        value is chosen as a decent average for most models, but larger models may need
        more trials to reach a good result while smaller models will converge with fewer
        trials.
    tuner : str, optional
        The type of tuner to use when tuning with autotvm. Can be one of
        "ga", "gridsearch", "random", "xgb", "xgb_knob", and "xgb-rank".
    min_repeat_ms : int, optional
        Minimum time to run each trial. Defaults to 0 on x86 and 1000 on other targets.
    early_stopping : int, optional
        When specified, stop tuning after this number of trials if results aren't improving.
    desired_layout : str, optional
        Can be one of "NCHW" or "NHWC". When specified, compatible operations in the graph
        will have their layout set to this format. Tasks will then be tuned using this
        specified layout.
    timeout : int, optional,
        If a kernel trial lasts longer than this duration in seconds, it will be
        considered a failure.
    repeat : int, optional
        How many times each measurement should be repeated.
    number : int, optional
        The number of runs a single repeat is made of.
    parallel : int, optional
        The maximum number of parallel devices to use when tuning.
    hardware_params : auto_scheduler.HardwareParams, optional
        When using the autoscheduler, this object defines the configuration of the target hardware.
    include_simple_tasks : bool, optional
        Whether to extract simple operations or only computationally intensive ones when using
        the autoscheduler.
    log_estimated_latency : bool, optional
        If using the autoscheduler, write the estimated latency at each step of tuning to file.
    additional_target_options: Optional[Dict[str, Dict[str, Any]]]
        Additional target options in a dictionary to combine with initial Target arguments

    Returns
    -------
    tuning_records : str
        The path to the produced tuning log file.
    """
    target, extra_targets = common.target_from_cli(target,
                                                   additional_target_options)
    target, target_host = Target.check_and_update_host_consist(
        target, target_host)
    # TODO(jwfromm) Remove this deepcopy once AlterOpLayout bug that mutates source
    # model is fixed. For now, creating a clone avoids the issue.
    mod = deepcopy(tvmc_model.mod)
    params = tvmc_model.params
    if tuning_records is None:
        tuning_records = tvmc_model.default_tuning_records_path()

    for codegen_from_cli in extra_targets:
        codegen = composite_target.get_codegen_by_target(
            codegen_from_cli["name"])
        partition_function = codegen["pass_pipeline"]
        mod = partition_function(mod, params, **codegen_from_cli["opts"])

    # min_repeat_ms should be:
    # a. the value provided by the user, if any, or
    # b. 0ms in case target is "cpu"; otherwise 1000ms
    if min_repeat_ms is None:
        min_repeat_ms = 0 if target.keys[0] == "cpu" else 1000
        logger.info("Default --min-repeat-ms for this target is %s",
                    min_repeat_ms)

    if rpc_key:
        if hostname is None or port is None:
            raise common.TVMCException(
                "You must provide a hostname and port to connect to a remote RPC device."
            )
        if isinstance(port, str):
            port = int(port)

        logger.info("Tuning will be performed on device %s at %s:%d.", rpc_key,
                    hostname, port)

        runner_ctor = auto_scheduler.RPCRunner if enable_autoscheduler else autotvm.RPCRunner
        runner = runner_ctor(
            key=rpc_key,
            host=hostname,
            port=port,
            number=number,
            repeat=repeat,
            n_parallel=parallel,
            timeout=timeout,
            min_repeat_ms=min_repeat_ms,
        )
    else:
        logger.info("Starting localhost tuning.")
        runner_ctor = (auto_scheduler.LocalRPCMeasureContext
                       if enable_autoscheduler else autotvm.LocalRunner)
        local_server = runner_ctor(
            number=number,
            repeat=repeat,
            timeout=timeout,
            min_repeat_ms=min_repeat_ms,
        )

        # For autoscheduling on some devices, we need to maintain a LocalRPCMeasureContext object.
        if enable_autoscheduler:
            runner = local_server.runner
        else:
            runner = local_server

    if enable_autoscheduler:

        tasks, weights = autoscheduler_get_tuning_tasks(
            mod=mod,
            params=params,
            target=target,
            alter_layout=desired_layout,
            hardware_params=hardware_params,
            include_simple_tasks=include_simple_tasks,
        )

        # Create the autoscheduler tuning options
        tuning_options = auto_scheduler.TuningOptions(
            num_measure_trials=trials,
            measure_callbacks=[auto_scheduler.RecordToFile(tuning_records)],
            runner=runner,
            early_stopping=early_stopping,
        )

        logger.info("Autoscheduling with configuration: %s", tuning_options)

        # Schedule the tasks (i.e., produce a schedule for each task)
        schedule_tasks(tasks, weights, tuning_options, prior_records,
                       log_estimated_latency)
    else:
        tasks = autotvm_get_tuning_tasks(
            mod=mod,
            params=params,
            target=target,
            alter_layout=desired_layout,
        )

        # In autotvm, trials is specified per task. We can convert the per-model input
        # provided to per-task trials by dividing by the number of tasks.
        trials = int(trials / len(tasks))
        logger.info("Autotuning with %d trials per task.", trials)

        tuning_options = {
            "tuner":
            tuner,
            "trials":
            trials,
            "early_stopping":
            early_stopping,
            "measure_option":
            autotvm.measure_option(
                builder=autotvm.LocalBuilder(build_func="default"),
                runner=runner),
            "tuning_records":
            prior_records,
        }
        logger.info("Autotuning with configuration: %s", tuning_options)

        tune_tasks(tasks, tuning_records, **tuning_options)

    return tuning_records