def warmup_estimate(ins, outs): ins = [runtime.array(x, ctx) for x in ins] outs = [runtime.array(x, ctx) for x in outs] tensors = ins + outs func(*tensors) runtime.gpu(visible_dev_id).sync() t_start = time.time() func(*tensors) runtime.gpu(visible_dev_id).sync() t_diff = time.time() - t_start return ins, outs, tensors, t_diff
def __init__(self, arg1, ctx=None, shape=None): """Construct a sparse matrix in CSR format. Parameters ---------- arg1 : numpy.ndarray or a tuple with (data, indices, indptr) The corresponding a dense numpy array, or a tuple for constructing a sparse matrix directly. ctx: tvmContext The corresponding context. shape : tuple of int The shape of the array """ if isinstance(arg1, tuple): assert len(arg1) == 3 self.data, self.indices, self.indptr = arg1 self.shape = shape elif isinstance(arg1, _np.ndarray): source_array = arg1 ridx, cidx = _np.nonzero(source_array) data = source_array[ridx, cidx] self.data = _nd.array(data, ctx) indices = _np.nonzero(source_array)[1].astype(itype) self.indices = _nd.array(indices, ctx) indptr = [0] + _np.apply_along_axis( _np.count_nonzero, axis=1, arr=source_array ).tolist() indptr = _np.cumsum(_np.array(indptr, itype)).astype(itype) self.indptr = _nd.array(indptr, ctx) self.shape = source_array.shape else: raise RuntimeError( "Construct CSRNDArray with either a tuple (data, indices, indptr) " "or a numpy.array, can't handle type %s." % (type(arg1),) ) self.stype = "csr" self.dtype = self.data.dtype assert self.shape is not None assert isinstance(self.data, _nd.NDArray) assert isinstance(self.indices, _nd.NDArray) assert str(self.indices.dtype) == "int32" or str(self.indices.dtype) == "int64", str( self.indices.dtype ) assert isinstance(self.indptr, _nd.NDArray) assert str(self.indptr.dtype) == "int32" or str(self.indptr.dtype) == "int64", str( self.indptr.dtype )
def run_module_via_rpc( rpc_config: "RPCConfig", lib: "Module", dev_type: str, args: Dict[str, "np.ndarray"], continuation: Callable, ): """Execute a tvm.runtime.Module on RPC remote""" # pylint: disable=import-outside-toplevel import os import tempfile from tvm.contrib.tar import tar from tvm.runtime import ndarray # pylint: enable=import-outside-toplevel with tempfile.TemporaryDirectory() as tmp_dir: filename = os.path.join(tmp_dir, "tvm_tmp_mod." + tar.output_format) lib.export_library(filename, tar) session = rpc_config.connect_server() session.upload(filename) _, filename = os.path.split(filename) rt_mod = session.load_module(filename) dev = session.device(dev_type=dev_type, dev_id=0) nd_args = {} for arg_key, arg_value in args.items(): nd_args[arg_key] = ndarray.array(arg_value, dev) return continuation(rt_mod, dev, nd_args)
def run_module_via_rpc( rpc_config: "RPCConfig", lib: Union["Module", "Executable"], dev_type: str, args: Dict[str, "np.ndarray"], continuation: Callable, backend: Optional[str] = "graph", ): """Execute a tvm.runtime.Module on RPC remote""" # pylint: disable=import-outside-toplevel import os import tempfile from tvm.contrib.tar import tar from tvm.runtime import ndarray # pylint: enable=import-outside-toplevel with tempfile.TemporaryDirectory() as tmp_dir: filename = os.path.join(tmp_dir, "tvm_tmp_mod." + tar.output_format) if backend == "vm": code, lib = lib.save() lib.export_library(filename, tar) session = rpc_config.connect_server() session.upload(filename) _, filename = os.path.split(filename) rt_mod = session.load_module(filename) if backend == "vm": rt_mod = session.get_function("runtime.Load_Executable")(code, rt_mod) dev = session.device(dev_type=dev_type, dev_id=0) nd_args = {k: ndarray.array(v, dev) for k, v in args.items()} return continuation(rt_mod, dev, nd_args)
def extract_from(self, tune_context: TuneContext, candidates: List[MeasureCandidate]) -> List[NDArray]: np.random.set_state(self.random_state) result = [ np.random.rand(np.random.randint(1, self.max_block_num + 1), self.feature_size) for candidate in candidates ] self.random_state = np.random.get_state() return [array(x) for x in result]
def test_sparse_array_tuple(): dtype, itype = "float32", "int32" stype = "csr" target = "llvm" dev = tvm.device(target, 0) nr, nc, n = te.size_var("nr"), te.size_var("nc"), te.size_var("n") A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name="A", dtype=dtype) assert A.stype == "csr" C = te.compute(A.data.shape, lambda i: A.data[i] * 2.0, tag="cs_scatter") s = te.create_schedule(C.op) _nr, _nc = 3, 5 a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype) - 0.6, 0.0) # convert to sparse array tuple source_array = a ridx, cidx = np.nonzero(source_array) data = source_array[ridx, cidx] a_data = _nd.array(data, dev) indices = np.nonzero(source_array)[1].astype(itype) a_indices = _nd.array(indices, dev) indptr = [0] + np.apply_along_axis( np.count_nonzero, axis=1, arr=source_array).tolist() indptr = np.cumsum(np.array(indptr, itype)).astype(itype) a_indptr = _nd.array(indptr, dev) a_init = (a_data, a_indices, a_indptr) # construct tvm sparse array with tuple a = tvmsp.array(a_init, shape=source_array.shape, device=dev) assert a.data.dtype == a.dtype Ab = namedtuple("CSRBuffer", ["data", "indices", "indptr"]) Ab.data = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name="A_data") Ab.indices = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name="A_indices") binds = {A.data: Ab.data, A.indices: Ab.indices} f = tvm.build(s, [nr, A.data, C], target, binds=binds) c = tvmsp.array(np.zeros((_nr, _nc), dtype), dev) c.data = tvm.nd.empty(a.data.shape, dtype) c.indices = a.indices c.indptr = a.indptr f(a.data.shape[0], a.data, c.data) tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2.0, rtol=1e-5)
def test_sparse_array_tuple(): dtype, itype = 'float32', 'int32' stype = 'csr' target = 'llvm' ctx = tvm.context(target, 0) nr, nc, n = te.size_var('nr'), te.size_var('nc'), te.size_var('n') A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype) assert (A.stype == 'csr') C = te.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter') s = te.create_schedule(C.op) _nr, _nc = 3, 5 a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype) - .6, 0.) # convert to sparse array tuple source_array = a ridx, cidx = np.nonzero(source_array) data = source_array[ridx, cidx] a_data = _nd.array(data, ctx) indices = np.nonzero(source_array)[1].astype(itype) a_indices = _nd.array(indices, ctx) indptr = [0] + np.apply_along_axis( np.count_nonzero, axis=1, arr=source_array).tolist() indptr = np.cumsum(np.array(indptr, itype)).astype(itype) a_indptr = _nd.array(indptr, ctx) a_init = (a_data, a_indices, a_indptr) # construct tvm sparse array with tuple a = tvmsp.array(a_init, shape=source_array.shape, ctx=ctx) assert a.data.dtype == a.dtype Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr']) Ab.data = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_data') Ab.indices = tvm.tir.decl_buffer(a.data.shape, a.data.dtype, name='A_indices') binds = {A.data: Ab.data, A.indices: Ab.indices} f = tvm.build(s, [nr, A.data, C], target, binds=binds) c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx) c.data = tvm.nd.empty(a.data.shape, dtype) c.indices = a.indices c.indptr = a.indptr f(a.data.shape[0], a.data, c.data) tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
def __init__(self, graph_json_str, libmod, libmod_name, params): assert isinstance(graph_json_str, string_types) fcreate = get_global_func("tvm.graph_runtime_factory.create") args = [] for k, v in params.items(): args.append(k) args.append(ndarray.array(v)) self.module = fcreate(graph_json_str, libmod, libmod_name, *args) self.graph_json = graph_json_str self.lib = libmod self.libmod_name = libmod_name self.params = params self.iter_cnt = 0
def _run_debug(self): """Execute the node specified with index will be executed. Each debug output will be copied to the buffer Time consumed for each execution will be set as debug output. """ self.debug_datum._time_list = [[float(t)] for t in self.run_individual(10, 1, 1)] for i, node in enumerate(self.debug_datum.get_graph_nodes()): num_outputs = self.debug_datum.get_graph_node_output_num(node) for j in range(num_outputs): out_tensor = self._get_output_by_layer(i, j) out_tensor = array(out_tensor) self.debug_datum._output_tensor_list.append(out_tensor)
def set_params(self, params): """Set constant parameters for the model. Parameters ---------- params : dict of str to NDArray Input parameters to the graph that do not change during inference time. Used for constant folding. """ inputs = {} for name, param in params.items(): if isinstance(param, np.ndarray): param = _nd.array(param) inputs[name] = _expr.const(param) self._set_params_func(inputs)
def _try_load_buffer_from_file(buffer_name): """Try to load buffer from a numpy file, if not found, return None. File name has a same format as `_save_buffer_to_file`. """ filelist = os.listdir() for file in filelist: if file.startswith(buffer_name + "."): meta_info = file.split(".")[-2].split("_") shape = [int(i) for i in meta_info[:-1]] dtype = meta_info[-1] buffer_data = np.fromfile(file, dtype=dtype, sep=" ") buffer_data = buffer_data.reshape(shape) return ndarray.array(buffer_data) return None
def const(value, dtype=None): """Create a constant value. Parameters ---------- value: Union[bool, int, float, numpy.ndarray, tvm.nd.NDArray] The constant value. dtype: str, optional The data type of the resulting constant. Note ---- When dtype is None, we use the following rule: - int maps to "int32" - float maps to "float32" - bool maps to "bool" - other using the same default rule as numpy. """ if isinstance(value, (_base.numeric_types, (bool, list))): value = _np.array(value, dtype=dtype) if not dtype: # when dtype is None: int maps to "int32", float maps to "float32" dtype = { _np.dtype("int64"): _np.int32, _np.dtype("float64"): _np.float32 }.get(value.dtype, None) if isinstance(value, (_np.ndarray, _np.generic)): if dtype is not None: value = value.astype(dtype) value = _nd.array(value) if not isinstance(value, _nd.NDArray): raise ValueError("value has to be scalar or NDArray") return Constant(value)
def _timed_eval_func( inp_serialized, build_res, number, repeat, min_repeat_ms, cooldown_interval, enable_cpu_cache_flush, verbose, ): # pylint: disable=import-outside-toplevel from .search_task import get_task_input_buffer # lazily import to avoid recursive dependency inp = MeasureInput.deserialize(inp_serialized) task_input_names = inp.task.task_input_names tic = time.time() error_no = 0 error_msg = None try: func = module.load_module(build_res.filename) dev = ndarray.device(str(inp.task.target), 0) # Limitation: # We can not get PackFunction directly in the remote mode as it is wrapped # under the std::function. We could lift the restriction later once we fold # the PackedFunc as an object. Currently, we pass function name to work # around it. f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else "" time_f = func.time_evaluator( func.entry_name, dev, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms, f_preproc=f_prepare, ) # pylint: disable=broad-except except Exception: costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.COMPILE_DEVICE error_msg = make_traceback_info() if error_no == 0: try: random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True) assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake" tensor_input_map = prepare_input_map( build_res.args) if task_input_names else {} args = [] task_inputs_count = 0 for arg in build_res.args: if arg in tensor_input_map: tensor_name = tensor_input_map[arg] if tensor_name in task_input_names: args.append( ndarray.array( get_task_input_buffer(inp.task.workload_key, tensor_name), dev)) task_inputs_count += 1 else: raise ValueError( "%s not found in task_inputs, " % (tensor_name) + "should provide with `SearchTask(..., task_inputs={...})`" ) else: empty_array = ndarray.empty(get_const_tuple(arg.shape), arg.dtype, dev) random_fill(empty_array) args.append(empty_array) if task_inputs_count != len(task_input_names): logger.warning( "task_inputs not fully matched, check if there's any unexpected error" ) dev.sync() costs = time_f(*args).results # pylint: disable=broad-except except Exception: costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.RUNTIME_DEVICE error_msg = make_traceback_info() shutil.rmtree(os.path.dirname(build_res.filename)) toc = time.time() time.sleep(cooldown_interval) if verbose >= 1: if error_no == MeasureErrorNo.NO_ERROR: print("*", end="", flush=True) else: print("*E", end="", flush=True) # Run error return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
def _timed_eval_func( inp_serialized, build_res, args, number, repeat, min_repeat_ms, cooldown_interval, enable_cpu_cache_flush, verbose, ): inp = MeasureInput.deserialize(inp_serialized) tic = time.time() error_no = 0 error_msg = None try: func = module.load_module(build_res.filename) dev = ndarray.device(str(inp.task.target), 0) # Limitation: # We can not get PackFunction directly in the remote mode as it is wrapped # under the std::function. We could lift the restriction later once we fold # the PackedFunc as an object. Currently, we pass function name to work # around it. f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else "" time_f = func.time_evaluator( func.entry_name, dev, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms, f_preproc=f_prepare, ) # pylint: disable=broad-except except Exception: costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.COMPILE_DEVICE error_msg = make_traceback_info() if error_no == 0: try: random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True) assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake" assert len(args) == len(build_res.args) # pylint: disable=consider-using-enumerate for idx in range(len(args)): if args[idx] is None: build_res_arg = build_res.args[idx] empty_array = ndarray.empty( get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev) random_fill(empty_array) args[idx] = empty_array else: args[idx] = ndarray.array(args[idx], dev) dev.sync() costs = time_f(*args).results # pylint: disable=broad-except except Exception: costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.RUNTIME_DEVICE error_msg = make_traceback_info() shutil.rmtree(os.path.dirname(build_res.filename)) toc = time.time() time.sleep(cooldown_interval) if verbose >= 1: if error_no == MeasureErrorNo.NO_ERROR: print("*", end="", flush=True) else: print("*E", end="", flush=True) # Run error return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
def _rpc_run( inp_serialized, build_res, args, key, host, port, priority, timeout, number, repeat, min_repeat_ms, cooldown_interval, enable_cpu_cache_flush, verbose, ): inp = MeasureInput.deserialize(inp_serialized) tic = time.time() error_no = 0 error_msg = None try: # upload built module remote = request_remote(key, host, port, priority, timeout) remote.upload(build_res.filename) func = remote.load_module(os.path.split(build_res.filename)[1]) dev = remote.device(str(inp.task.target), 0) # Limitation: # We can not get PackFunction directly in the remote mode as it is wrapped # under the std::function. We could lift the restriction later once we fold # the PackedFunc as an object. Currently, we pass function name to work # around it. f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else "" time_f = func.time_evaluator( func.entry_name, dev, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms, f_preproc=f_prepare, ) # pylint: disable=broad-except except Exception: costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.COMPILE_DEVICE error_msg = make_traceback_info() if error_no == 0: try: stream = dev.create_raw_stream() dev.set_raw_stream(stream) random_fill = remote.get_function("tvm.contrib.random.random_fill") assert ( random_fill ), "Please make sure USE_RANDOM is ON in the config.cmake on the remote devices" assert len(args) == len(build_res.args) # pylint: disable=consider-using-enumerate for idx in range(len(args)): if args[idx] is None: build_res_arg = build_res.args[idx] empty_array = ndarray.empty( get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev) random_fill(empty_array) args[idx] = empty_array else: args[idx] = ndarray.array(args[idx], dev) dev.sync() # First run for check that the kernel is correct func.entry_func(*args) dev.sync() costs = time_f(*args).results # clean up remote files remote.remove(build_res.filename) remote.remove(os.path.splitext(build_res.filename)[0] + ".so") remote.remove("") dev.free_raw_stream(stream) # pylint: disable=broad-except except Exception: dev.free_raw_stream(stream) costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.RUNTIME_DEVICE error_msg = make_traceback_info() shutil.rmtree(os.path.dirname(build_res.filename)) toc = time.time() time.sleep(cooldown_interval) if verbose >= 1: if error_no == MeasureErrorNo.NO_ERROR: print("*", end="") else: print("*E", end="") # Run error return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
def extract_from( self, context: TuneContext, # pylint: disable = unused-argument candidates: List[MeasureCandidate], # pylint: disable = unused-argument ) -> List[np.ndarray]: return [array(np.random.rand(4, 5))]
def extract_task_from_relay( mod: Union[IRModule, RelayFunc], target: Target, params: Optional[Dict[str, NDArray]] = None, *, opt_level: int = 3, pass_config: Optional[Dict[str, Any]] = None, disabled_pass: Optional[List[str]] = None, ) -> List[ExtractedTask]: """Extract tuning tasks from a relay program. Parameters ---------- mod : Union[tvm.IRModule, tvm.relay.Function] The module or function to tune target : tvm.target.Target The compilation target params : Optional[Dict[str, tvm.runtime.NDArray]] The associated parameters of the program opt_level : int The optimization level of the compiler pass_config : Optional[Dict[str, Any]] The pass config of the compiler disabled_pass : Optional[List[str]] The list of disabled passes of the compiler Returns ------- tasks: List[ExtractedTask] The tasks extracted from this network """ extract_task_func = get_global_func( "relay.backend.MetaScheduleExtractTask") assert extract_task_func target = Target(target) if isinstance(target, str) else target relay_params = {} for name, param in params.items(): if isinstance(param, np.ndarray): param = nd.array(param) relay_params[name] = param if disabled_pass is None: disabled_pass = [] if pass_config is None: pass_config = {"relay.backend.use_meta_schedule": True} if isinstance(mod, RelayFunc): mod = IRModule.from_expr(mod) if not isinstance(target, Target): target = Target(target) with target, transform.PassContext( opt_level=opt_level, config=pass_config, disabled_pass=disabled_pass, ): tasks = extract_task_func(mod, target, relay_params) # Tasks are extracted via post order visit, return the reversed list. return list(reversed(tasks))