Example #1
0
    def warmup_estimate(ins, outs):
        ins = [runtime.array(x, ctx) for x in ins]
        outs = [runtime.array(x, ctx) for x in outs]

        tensors = ins + outs
        func(*tensors)
        runtime.gpu(visible_dev_id).sync()

        t_start = time.time()
        func(*tensors)
        runtime.gpu(visible_dev_id).sync()
        t_diff = time.time() - t_start
        return ins, outs, tensors, t_diff
Example #2
0
    def __init__(self, arg1, ctx=None, shape=None):
        """Construct a sparse matrix in CSR format.

        Parameters
        ----------
        arg1 : numpy.ndarray or a tuple with (data, indices, indptr)
            The corresponding a dense numpy array,
            or a tuple for constructing a sparse matrix directly.

        ctx: tvmContext
            The corresponding context.

        shape : tuple of int
            The shape of the array
        """
        if isinstance(arg1, tuple):
            assert len(arg1) == 3
            self.data, self.indices, self.indptr = arg1
            self.shape = shape
        elif isinstance(arg1, _np.ndarray):
            source_array = arg1
            ridx, cidx = _np.nonzero(source_array)
            data = source_array[ridx, cidx]
            self.data = _nd.array(data, ctx)
            indices = _np.nonzero(source_array)[1].astype(itype)
            self.indices = _nd.array(indices, ctx)
            indptr = [0] + _np.apply_along_axis(
                _np.count_nonzero, axis=1, arr=source_array
            ).tolist()
            indptr = _np.cumsum(_np.array(indptr, itype)).astype(itype)
            self.indptr = _nd.array(indptr, ctx)
            self.shape = source_array.shape
        else:
            raise RuntimeError(
                "Construct CSRNDArray with either a tuple (data, indices, indptr) "
                "or a numpy.array, can't handle type %s." % (type(arg1),)
            )
        self.stype = "csr"
        self.dtype = self.data.dtype
        assert self.shape is not None
        assert isinstance(self.data, _nd.NDArray)
        assert isinstance(self.indices, _nd.NDArray)
        assert str(self.indices.dtype) == "int32" or str(self.indices.dtype) == "int64", str(
            self.indices.dtype
        )
        assert isinstance(self.indptr, _nd.NDArray)
        assert str(self.indptr.dtype) == "int32" or str(self.indptr.dtype) == "int64", str(
            self.indptr.dtype
        )
def run_module_via_rpc(
    rpc_config: "RPCConfig",
    lib: "Module",
    dev_type: str,
    args: Dict[str, "np.ndarray"],
    continuation: Callable,
):
    """Execute a tvm.runtime.Module on RPC remote"""
    # pylint: disable=import-outside-toplevel
    import os
    import tempfile

    from tvm.contrib.tar import tar
    from tvm.runtime import ndarray

    # pylint: enable=import-outside-toplevel

    with tempfile.TemporaryDirectory() as tmp_dir:
        filename = os.path.join(tmp_dir, "tvm_tmp_mod." + tar.output_format)
        lib.export_library(filename, tar)
        session = rpc_config.connect_server()
        session.upload(filename)
        _, filename = os.path.split(filename)
        rt_mod = session.load_module(filename)
        dev = session.device(dev_type=dev_type, dev_id=0)
        nd_args = {}
        for arg_key, arg_value in args.items():
            nd_args[arg_key] = ndarray.array(arg_value, dev)
        return continuation(rt_mod, dev, nd_args)
Example #4
0
def run_module_via_rpc(
    rpc_config: "RPCConfig",
    lib: Union["Module", "Executable"],
    dev_type: str,
    args: Dict[str, "np.ndarray"],
    continuation: Callable,
    backend: Optional[str] = "graph",
):
    """Execute a tvm.runtime.Module on RPC remote"""
    # pylint: disable=import-outside-toplevel
    import os
    import tempfile

    from tvm.contrib.tar import tar
    from tvm.runtime import ndarray

    # pylint: enable=import-outside-toplevel

    with tempfile.TemporaryDirectory() as tmp_dir:
        filename = os.path.join(tmp_dir, "tvm_tmp_mod." + tar.output_format)
        if backend == "vm":
            code, lib = lib.save()
        lib.export_library(filename, tar)
        session = rpc_config.connect_server()
        session.upload(filename)
        _, filename = os.path.split(filename)
        rt_mod = session.load_module(filename)
        if backend == "vm":
            rt_mod = session.get_function("runtime.Load_Executable")(code,
                                                                     rt_mod)
        dev = session.device(dev_type=dev_type, dev_id=0)
        nd_args = {k: ndarray.array(v, dev) for k, v in args.items()}
        return continuation(rt_mod, dev, nd_args)
Example #5
0
 def extract_from(self, tune_context: TuneContext,
                  candidates: List[MeasureCandidate]) -> List[NDArray]:
     np.random.set_state(self.random_state)
     result = [
         np.random.rand(np.random.randint(1, self.max_block_num + 1),
                        self.feature_size) for candidate in candidates
     ]
     self.random_state = np.random.get_state()
     return [array(x) for x in result]
Example #6
0
def test_sparse_array_tuple():
    dtype, itype = "float32", "int32"
    stype = "csr"
    target = "llvm"
    dev = tvm.device(target, 0)
    nr, nc, n = te.size_var("nr"), te.size_var("nc"), te.size_var("n")
    A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name="A", dtype=dtype)
    assert A.stype == "csr"
    C = te.compute(A.data.shape, lambda i: A.data[i] * 2.0, tag="cs_scatter")
    s = te.create_schedule(C.op)
    _nr, _nc = 3, 5
    a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype) - 0.6, 0.0)
    # convert to sparse array tuple
    source_array = a
    ridx, cidx = np.nonzero(source_array)
    data = source_array[ridx, cidx]
    a_data = _nd.array(data, dev)
    indices = np.nonzero(source_array)[1].astype(itype)
    a_indices = _nd.array(indices, dev)
    indptr = [0] + np.apply_along_axis(
        np.count_nonzero, axis=1, arr=source_array).tolist()
    indptr = np.cumsum(np.array(indptr, itype)).astype(itype)
    a_indptr = _nd.array(indptr, dev)
    a_init = (a_data, a_indices, a_indptr)
    # construct tvm sparse array with tuple
    a = tvmsp.array(a_init, shape=source_array.shape, device=dev)
    assert a.data.dtype == a.dtype
    Ab = namedtuple("CSRBuffer", ["data", "indices", "indptr"])
    Ab.data = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name="A_data")
    Ab.indices = tvm.tir.decl_buffer(a.data.shape,
                                     a.data.dtype,
                                     name="A_indices")
    binds = {A.data: Ab.data, A.indices: Ab.indices}
    f = tvm.build(s, [nr, A.data, C], target, binds=binds)
    c = tvmsp.array(np.zeros((_nr, _nc), dtype), dev)
    c.data = tvm.nd.empty(a.data.shape, dtype)
    c.indices = a.indices
    c.indptr = a.indptr
    f(a.data.shape[0], a.data, c.data)
    tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2.0, rtol=1e-5)
Example #7
0
def test_sparse_array_tuple():
    dtype, itype = 'float32', 'int32'
    stype = 'csr'
    target = 'llvm'
    ctx = tvm.context(target, 0)
    nr, nc, n = te.size_var('nr'), te.size_var('nc'), te.size_var('n')
    A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype)
    assert (A.stype == 'csr')
    C = te.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
    s = te.create_schedule(C.op)
    _nr, _nc = 3, 5
    a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype) - .6, 0.)
    # convert to sparse array tuple
    source_array = a
    ridx, cidx = np.nonzero(source_array)
    data = source_array[ridx, cidx]
    a_data = _nd.array(data, ctx)
    indices = np.nonzero(source_array)[1].astype(itype)
    a_indices = _nd.array(indices, ctx)
    indptr = [0] + np.apply_along_axis(
        np.count_nonzero, axis=1, arr=source_array).tolist()
    indptr = np.cumsum(np.array(indptr, itype)).astype(itype)
    a_indptr = _nd.array(indptr, ctx)
    a_init = (a_data, a_indices, a_indptr)
    # construct tvm sparse array with tuple
    a = tvmsp.array(a_init, shape=source_array.shape, ctx=ctx)
    assert a.data.dtype == a.dtype
    Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr'])
    Ab.data = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_data')
    Ab.indices = tvm.tir.decl_buffer(a.data.shape,
                                     a.data.dtype,
                                     name='A_indices')
    binds = {A.data: Ab.data, A.indices: Ab.indices}
    f = tvm.build(s, [nr, A.data, C], target, binds=binds)
    c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx)
    c.data = tvm.nd.empty(a.data.shape, dtype)
    c.indices = a.indices
    c.indptr = a.indptr
    f(a.data.shape[0], a.data, c.data)
    tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
Example #8
0
 def __init__(self, graph_json_str, libmod, libmod_name, params):
     assert isinstance(graph_json_str, string_types)
     fcreate = get_global_func("tvm.graph_runtime_factory.create")
     args = []
     for k, v in params.items():
         args.append(k)
         args.append(ndarray.array(v))
     self.module = fcreate(graph_json_str, libmod, libmod_name, *args)
     self.graph_json = graph_json_str
     self.lib = libmod
     self.libmod_name = libmod_name
     self.params = params
     self.iter_cnt = 0
Example #9
0
    def _run_debug(self):
        """Execute the node specified with index will be executed.
        Each debug output will be copied to the buffer
        Time consumed for each execution will be set as debug output.

        """
        self.debug_datum._time_list = [[float(t)] for t in self.run_individual(10, 1, 1)]
        for i, node in enumerate(self.debug_datum.get_graph_nodes()):
            num_outputs = self.debug_datum.get_graph_node_output_num(node)
            for j in range(num_outputs):
                out_tensor = self._get_output_by_layer(i, j)
                out_tensor = array(out_tensor)
                self.debug_datum._output_tensor_list.append(out_tensor)
Example #10
0
    def set_params(self, params):
        """Set constant parameters for the model.

        Parameters
        ----------
        params : dict of str to NDArray
            Input parameters to the graph that do not change
            during inference time. Used for constant folding.
        """
        inputs = {}
        for name, param in params.items():
            if isinstance(param, np.ndarray):
                param = _nd.array(param)
            inputs[name] = _expr.const(param)
        self._set_params_func(inputs)
Example #11
0
def _try_load_buffer_from_file(buffer_name):
    """Try to load buffer from a numpy file, if not found, return None.

    File name has a same format as `_save_buffer_to_file`.
    """
    filelist = os.listdir()

    for file in filelist:
        if file.startswith(buffer_name + "."):
            meta_info = file.split(".")[-2].split("_")
            shape = [int(i) for i in meta_info[:-1]]
            dtype = meta_info[-1]
            buffer_data = np.fromfile(file, dtype=dtype, sep=" ")
            buffer_data = buffer_data.reshape(shape)
            return ndarray.array(buffer_data)

    return None
Example #12
0
def const(value, dtype=None):
    """Create a constant value.

    Parameters
    ----------
    value: Union[bool, int, float, numpy.ndarray, tvm.nd.NDArray]
        The constant value.

    dtype: str, optional
        The data type of the resulting constant.

    Note
    ----
    When dtype is None, we use the following rule:

    - int maps to "int32"
    - float maps to "float32"
    - bool maps to "bool"
    - other using the same default rule as numpy.
    """
    if isinstance(value, (_base.numeric_types, (bool, list))):
        value = _np.array(value, dtype=dtype)

    if not dtype:
        # when dtype is None: int maps to "int32", float maps to "float32"
        dtype = {
            _np.dtype("int64"): _np.int32,
            _np.dtype("float64"): _np.float32
        }.get(value.dtype, None)

    if isinstance(value, (_np.ndarray, _np.generic)):
        if dtype is not None:
            value = value.astype(dtype)
        value = _nd.array(value)

    if not isinstance(value, _nd.NDArray):
        raise ValueError("value has to be scalar or NDArray")

    return Constant(value)
Example #13
0
def _timed_eval_func(
    inp_serialized,
    build_res,
    number,
    repeat,
    min_repeat_ms,
    cooldown_interval,
    enable_cpu_cache_flush,
    verbose,
):
    # pylint: disable=import-outside-toplevel
    from .search_task import get_task_input_buffer  # lazily import to avoid recursive dependency

    inp = MeasureInput.deserialize(inp_serialized)
    task_input_names = inp.task.task_input_names
    tic = time.time()
    error_no = 0
    error_msg = None
    try:
        func = module.load_module(build_res.filename)
        dev = ndarray.device(str(inp.task.target), 0)
        # Limitation:
        # We can not get PackFunction directly in the remote mode as it is wrapped
        # under the std::function. We could lift the restriction later once we fold
        # the PackedFunc as an object. Currently, we pass function name to work
        # around it.
        f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else ""
        time_f = func.time_evaluator(
            func.entry_name,
            dev,
            number=number,
            repeat=repeat,
            min_repeat_ms=min_repeat_ms,
            f_preproc=f_prepare,
        )
    # pylint: disable=broad-except
    except Exception:
        costs = (MAX_FLOAT, )
        error_no = MeasureErrorNo.COMPILE_DEVICE
        error_msg = make_traceback_info()

    if error_no == 0:
        try:
            random_fill = tvm.get_global_func("tvm.contrib.random.random_fill",
                                              True)
            assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"

            tensor_input_map = prepare_input_map(
                build_res.args) if task_input_names else {}
            args = []
            task_inputs_count = 0
            for arg in build_res.args:
                if arg in tensor_input_map:
                    tensor_name = tensor_input_map[arg]
                    if tensor_name in task_input_names:
                        args.append(
                            ndarray.array(
                                get_task_input_buffer(inp.task.workload_key,
                                                      tensor_name), dev))
                        task_inputs_count += 1
                    else:
                        raise ValueError(
                            "%s not found in task_inputs, " % (tensor_name) +
                            "should provide with `SearchTask(..., task_inputs={...})`"
                        )
                else:
                    empty_array = ndarray.empty(get_const_tuple(arg.shape),
                                                arg.dtype, dev)
                    random_fill(empty_array)
                    args.append(empty_array)
            if task_inputs_count != len(task_input_names):
                logger.warning(
                    "task_inputs not fully matched, check if there's any unexpected error"
                )
            dev.sync()
            costs = time_f(*args).results
        # pylint: disable=broad-except
        except Exception:
            costs = (MAX_FLOAT, )
            error_no = MeasureErrorNo.RUNTIME_DEVICE
            error_msg = make_traceback_info()

    shutil.rmtree(os.path.dirname(build_res.filename))
    toc = time.time()
    time.sleep(cooldown_interval)

    if verbose >= 1:
        if error_no == MeasureErrorNo.NO_ERROR:
            print("*", end="", flush=True)
        else:
            print("*E", end="", flush=True)  # Run error
    return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
Example #14
0
def _timed_eval_func(
    inp_serialized,
    build_res,
    args,
    number,
    repeat,
    min_repeat_ms,
    cooldown_interval,
    enable_cpu_cache_flush,
    verbose,
):
    inp = MeasureInput.deserialize(inp_serialized)
    tic = time.time()
    error_no = 0
    error_msg = None
    try:
        func = module.load_module(build_res.filename)
        dev = ndarray.device(str(inp.task.target), 0)
        # Limitation:
        # We can not get PackFunction directly in the remote mode as it is wrapped
        # under the std::function. We could lift the restriction later once we fold
        # the PackedFunc as an object. Currently, we pass function name to work
        # around it.
        f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else ""
        time_f = func.time_evaluator(
            func.entry_name,
            dev,
            number=number,
            repeat=repeat,
            min_repeat_ms=min_repeat_ms,
            f_preproc=f_prepare,
        )
    # pylint: disable=broad-except
    except Exception:
        costs = (MAX_FLOAT, )
        error_no = MeasureErrorNo.COMPILE_DEVICE
        error_msg = make_traceback_info()

    if error_no == 0:
        try:
            random_fill = tvm.get_global_func("tvm.contrib.random.random_fill",
                                              True)
            assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"
            assert len(args) == len(build_res.args)
            # pylint: disable=consider-using-enumerate
            for idx in range(len(args)):
                if args[idx] is None:
                    build_res_arg = build_res.args[idx]
                    empty_array = ndarray.empty(
                        get_const_tuple(build_res_arg.shape),
                        build_res_arg.dtype, dev)
                    random_fill(empty_array)
                    args[idx] = empty_array
                else:
                    args[idx] = ndarray.array(args[idx], dev)
            dev.sync()
            costs = time_f(*args).results
        # pylint: disable=broad-except
        except Exception:
            costs = (MAX_FLOAT, )
            error_no = MeasureErrorNo.RUNTIME_DEVICE
            error_msg = make_traceback_info()

    shutil.rmtree(os.path.dirname(build_res.filename))
    toc = time.time()
    time.sleep(cooldown_interval)

    if verbose >= 1:
        if error_no == MeasureErrorNo.NO_ERROR:
            print("*", end="", flush=True)
        else:
            print("*E", end="", flush=True)  # Run error
    return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
Example #15
0
def _rpc_run(
    inp_serialized,
    build_res,
    args,
    key,
    host,
    port,
    priority,
    timeout,
    number,
    repeat,
    min_repeat_ms,
    cooldown_interval,
    enable_cpu_cache_flush,
    verbose,
):
    inp = MeasureInput.deserialize(inp_serialized)
    tic = time.time()
    error_no = 0
    error_msg = None
    try:
        # upload built module
        remote = request_remote(key, host, port, priority, timeout)
        remote.upload(build_res.filename)
        func = remote.load_module(os.path.split(build_res.filename)[1])
        dev = remote.device(str(inp.task.target), 0)
        # Limitation:
        # We can not get PackFunction directly in the remote mode as it is wrapped
        # under the std::function. We could lift the restriction later once we fold
        # the PackedFunc as an object. Currently, we pass function name to work
        # around it.
        f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else ""
        time_f = func.time_evaluator(
            func.entry_name,
            dev,
            number=number,
            repeat=repeat,
            min_repeat_ms=min_repeat_ms,
            f_preproc=f_prepare,
        )
    # pylint: disable=broad-except
    except Exception:
        costs = (MAX_FLOAT, )
        error_no = MeasureErrorNo.COMPILE_DEVICE
        error_msg = make_traceback_info()

    if error_no == 0:
        try:
            stream = dev.create_raw_stream()
            dev.set_raw_stream(stream)
            random_fill = remote.get_function("tvm.contrib.random.random_fill")
            assert (
                random_fill
            ), "Please make sure USE_RANDOM is ON in the config.cmake on the remote devices"

            assert len(args) == len(build_res.args)
            # pylint: disable=consider-using-enumerate
            for idx in range(len(args)):
                if args[idx] is None:
                    build_res_arg = build_res.args[idx]
                    empty_array = ndarray.empty(
                        get_const_tuple(build_res_arg.shape),
                        build_res_arg.dtype, dev)
                    random_fill(empty_array)
                    args[idx] = empty_array
                else:
                    args[idx] = ndarray.array(args[idx], dev)
            dev.sync()

            # First run for check that the kernel is correct
            func.entry_func(*args)
            dev.sync()

            costs = time_f(*args).results

            # clean up remote files
            remote.remove(build_res.filename)
            remote.remove(os.path.splitext(build_res.filename)[0] + ".so")
            remote.remove("")
            dev.free_raw_stream(stream)
        # pylint: disable=broad-except
        except Exception:
            dev.free_raw_stream(stream)
            costs = (MAX_FLOAT, )
            error_no = MeasureErrorNo.RUNTIME_DEVICE
            error_msg = make_traceback_info()

    shutil.rmtree(os.path.dirname(build_res.filename))
    toc = time.time()

    time.sleep(cooldown_interval)
    if verbose >= 1:
        if error_no == MeasureErrorNo.NO_ERROR:
            print("*", end="")
        else:
            print("*E", end="")  # Run error

    return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
 def extract_from(
     self,
     context: TuneContext,  # pylint: disable = unused-argument
     candidates: List[MeasureCandidate],  # pylint: disable = unused-argument
 ) -> List[np.ndarray]:
     return [array(np.random.rand(4, 5))]
Example #17
0
def extract_task_from_relay(
    mod: Union[IRModule, RelayFunc],
    target: Target,
    params: Optional[Dict[str, NDArray]] = None,
    *,
    opt_level: int = 3,
    pass_config: Optional[Dict[str, Any]] = None,
    disabled_pass: Optional[List[str]] = None,
) -> List[ExtractedTask]:
    """Extract tuning tasks from a relay program.

    Parameters
    ----------
    mod : Union[tvm.IRModule, tvm.relay.Function]
        The module or function to tune
    target : tvm.target.Target
        The compilation target
    params : Optional[Dict[str, tvm.runtime.NDArray]]
        The associated parameters of the program
    opt_level : int
        The optimization level of the compiler
    pass_config : Optional[Dict[str, Any]]
        The pass config of the compiler
    disabled_pass : Optional[List[str]]
        The list of disabled passes of the compiler

    Returns
    -------
    tasks: List[ExtractedTask]
        The tasks extracted from this network
    """

    extract_task_func = get_global_func(
        "relay.backend.MetaScheduleExtractTask")
    assert extract_task_func

    target = Target(target) if isinstance(target, str) else target

    relay_params = {}
    for name, param in params.items():
        if isinstance(param, np.ndarray):
            param = nd.array(param)
        relay_params[name] = param

    if disabled_pass is None:
        disabled_pass = []
    if pass_config is None:
        pass_config = {"relay.backend.use_meta_schedule": True}

    if isinstance(mod, RelayFunc):
        mod = IRModule.from_expr(mod)
    if not isinstance(target, Target):
        target = Target(target)

    with target, transform.PassContext(
            opt_level=opt_level,
            config=pass_config,
            disabled_pass=disabled_pass,
    ):
        tasks = extract_task_func(mod, target, relay_params)
        # Tasks are extracted via post order visit, return the reversed list.
        return list(reversed(tasks))