def maybe_warn(target, func_name): if get_global_func(func_name, allow_missing=True) and not "thrust" in target.libs: logging.warning("TVM is built with thrust but thrust is not used.") if "thrust" in target.libs and get_global_func(func_name, allow_missing=True) is None: logging.warning( "thrust is requested but TVM is not built with thrust.")
def load_library(config_file_name): """Import files to create a pipeline executor. Parameters ---------- config_file_name : str Path and name of the configuration file, the configuration file contains the disk path of the parameter file, library file, and JSON file. """ with open(config_file_name, "r") as file_handle: config = file_handle.read() config = json.loads(config) if "load_config" not in config or "pipeline_config" not in config: raise RuntimeError( '"load_config" or "pipeline_config" is missing in %s' % config_file_name) # The config file used to load library, prameters, and JSON files. with open(config["load_config"], "r") as file_handle: load_config = file_handle.read() # The config file used to load pipeline compute config. with open(config["pipeline_config"], "r") as file_handle: pipeline_config = file_handle.read() # Load a PipelineExecutor from the disk files. load_library = get_global_func("tvm.pipeline_executor.load", allow_missing=False) module = load_library(load_config, pipeline_config) return PipelineModule(module)
def get_global_func_with_default_on_worker( name: Union[None, str, Callable], default: Callable, ) -> Callable: """Get the registered global function on the worker process. Parameters ---------- name : Union[None, str, Callable] If given a string, retrieve the function in TVM's global registry; If given a python function, return it as it is; Otherwise, return `default`. default : Callable The function to be returned if `name` is None. Returns ------- result : Callable The retrieved global function or `default` if `name` is None """ if name is None: return default if callable(name): return name try: return get_global_func(name) except TVMError as error: raise ValueError( "Function '{name}' is not registered on the worker process. " "The build function and export function should be registered in the worker process. " "Note that the worker process is only aware of functions registered in TVM package, " "if there are extra functions to be registered, " "please send the registration logic via initializer.") from error
def mlas_packb(B, K, N, transb=True): r"""Pre-pack B matrix if it is constant for mlas_matmul, C = A * B^T. Parameters ---------- B : tvm.relay.Expr The second input of mlas_matmul. K : int The number of colums of A. N : int The number of colums of output C. transb : bool Whether the B matrix is transposed. Returns ------- result: tvm.relay.Expr The pre-packed B matrix. """ get_packb_size = _ffi.get_global_func("tvm.contrib.mlas.gemm_packb_size") packb_size = get_packb_size(N, K) # only support 4 bytes float32 datatype arr_size = int(packb_size / 4) return _make.mlas_packb(B, K, N, arr_size, transb)
def pipeline_executor_enabled(): """Check if the pipeline executor is enabled. Return ------- enable: bool Return whether the pipeline executor is enabled. """ return get_global_func("tvm.pipeline_executor.create", allow_missing=True) is not None
def _get_sort_func(mode=0): """Get sort function for argwhere. mode 0 for topk and others for argsort.""" if get_global_func("tvm.contrib.thrust.sort", allow_missing=True): ret = topk_thrust if mode == 0 else argsort_thrust else: logger.warning( "It's highly recommended to enable thrust library with set(USE_THRUST ON)" " when compiling argwhere for cuda target. Otherwise, it can result in" " significant performance degradation or incorrect result" ) ret = topk if mode == 0 else argsort return ret
def __init__( self, database: Database, te_filter_func: Union[str, None, Callable[[List[Tensor]], PrimFunc]] = None, ) -> None: if isinstance(te_filter_func, str): te_filter_func = get_global_func(te_filter_func) self.__init_handle_by_constructor__( _ffi_api.ApplyHistoryBest, # type: ignore # pylint: disable=no-member database, te_filter_func, make_logging_func(logger), )
def get_pipeline_executor_module(self): """Get the pipeline executor module. Returns ------- module : Module Common interface for pipeline executor factory Module. """ if not self.module: graph_executors, config = self.graph_executor_create( self.pipeline_mods, self.mods_config) self.pipeline_create = get_global_func( "tvm.pipeline_executor.create", allow_missing=False) self.module = self.pipeline_create(graph_executors, config) return self.module
def shash2hex(mod: IRModule) -> str: """Get the structural hash of a module. Parameters ---------- mod : IRModule The module to be hashed. Returns ------- result : str The structural hash of the module. """ func = get_global_func("meta_schedule._SHash2Hex") return str(func(mod))
def topk_strategy_cuda(attrs, inputs, out_type, target): """topk cuda strategy""" strategy = _op.OpStrategy() strategy.add_implementation( wrap_compute_topk(topi.cuda.topk), wrap_topi_schedule(topi.cuda.schedule_topk), name="topk.cuda", ) if target.kind.name == "cuda" and get_global_func( "tvm.contrib.thrust.sort", allow_missing=True): strategy.add_implementation( wrap_compute_topk(topi.cuda.topk_thrust), wrap_topi_schedule(topi.cuda.schedule_topk), name="topk_thrust.cuda", plevel=15, ) return strategy
def scatter_cuda(attrs, inputs, out_type, target): """scatter cuda strategy""" strategy = _op.OpStrategy() strategy.add_implementation( wrap_compute_scatter(topi.cuda.scatter), wrap_topi_schedule(topi.cuda.schedule_scatter), name="scatter.cuda", plevel=10, ) rank = len(inputs[0].shape) with SpecializedCondition(rank == 1): if target.kind.name == "cuda" and get_global_func( "tvm.contrib.thrust.stable_sort_by_key", allow_missing=True): strategy.add_implementation( wrap_compute_scatter(topi.cuda.scatter_via_sort), wrap_topi_schedule(topi.cuda.schedule_scatter_via_sort), name="scatter_via_sort.cuda", plevel=9, # use the sequential version by default ) return strategy
def can_use_rocthrust(target, func_name): maybe_warn(target, func_name) return (target.kind.name == "rocm" and "thrust" in target.libs and get_global_func(func_name, allow_missing=True))
def extract_task_from_relay( mod: Union[IRModule, RelayFunc], target: Target, params: Optional[Dict[str, NDArray]] = None, *, opt_level: int = 3, pass_config: Optional[Dict[str, Any]] = None, disabled_pass: Optional[List[str]] = None, ) -> List[ExtractedTask]: """Extract tuning tasks from a relay program. Parameters ---------- mod : Union[tvm.IRModule, tvm.relay.Function] The module or function to tune target : tvm.target.Target The compilation target params : Optional[Dict[str, tvm.runtime.NDArray]] The associated parameters of the program opt_level : int The optimization level of the compiler pass_config : Optional[Dict[str, Any]] The pass config of the compiler disabled_pass : Optional[List[str]] The list of disabled passes of the compiler Returns ------- tasks: List[ExtractedTask] The tasks extracted from this network """ extract_task_func = get_global_func( "relay.backend.MetaScheduleExtractTask") assert extract_task_func target = Target(target) if isinstance(target, str) else target relay_params = {} for name, param in params.items(): if isinstance(param, np.ndarray): param = nd.array(param) relay_params[name] = param if disabled_pass is None: disabled_pass = [] if pass_config is None: pass_config = {"relay.backend.use_meta_schedule": True} if isinstance(mod, RelayFunc): mod = IRModule.from_expr(mod) if not isinstance(target, Target): target = Target(target) with target, transform.PassContext( opt_level=opt_level, config=pass_config, disabled_pass=disabled_pass, ): tasks = extract_task_func(mod, target, relay_params) # Tasks are extracted via post order visit, return the reversed list. return list(reversed(tasks))
def extract_task_from_relay( mod: IRModule, target: Target, params: Optional[Dict[str, NDArray]] = None, *, opt_level: int = 3, pass_config: Optional[Dict[str, Any]] = None, disabled_pass: Optional[List[str]] = None, ) -> List[ExtractedTask]: """Extract tuning tasks from a relay program. Parameters ---------- mod : IRModule The module or function to tune target : tvm.target.Target The compilation target params : Optional[Dict[str, tvm.runtime.NDArray]] The associated parameters of the program opt_level : int The optimization level of the compiler pass_config : Optional[Dict[str, Any]] The pass config of the compiler disabled_pass : Optional[List[str]] The list of disabled passes of the compiler Returns ------- tasks: List[ExtractedTask] The tasks extracted from this network """ # pylint: disable=import-outside-toplevel from tvm.relay import Function as RelayFunc # pylint: enable=import-outside-toplevel extract_task_func = get_global_func( "relay.backend.MetaScheduleExtractTask", allow_missing=False, ) if isinstance(mod, RelayFunc): mod = IRModule.from_expr(mod) if not isinstance(target, Target): target = Target(target) if disabled_pass is None: disabled_pass = [] if pass_config is None: pass_config = {"relay.backend.use_meta_schedule": True} if params is None: params = {} relay_params = {} for name, param in params.items(): if isinstance(param, np.ndarray): param = nd.array(param) relay_params[name] = param with autotvm_silencer(), target, transform.PassContext( opt_level=opt_level, config=pass_config, disabled_pass=disabled_pass, ): return list(extract_task_func(mod, target, relay_params))
def schedule_lrn(attrs, outs, target): """Schedule LRN op""" with target: return topi.generic.schedule_lrn(outs) # bitpack @generic_func def schedule_bitpack(attrs, outs, target): """Schedule bitpack""" with target: return topi.generic.schedule_bitpack(outs) get_auto_scheduler_rewritten_layout = _ffi.get_global_func( "relay.attrs.get_auto_scheduler_rewritten_layout" ) # conv2d def wrap_compute_conv2d( topi_compute, need_data_layout=False, need_out_layout=False, has_groups=False, need_auto_scheduler_layout=False, ): """Wrap conv2d topi compute""" def _compute_conv2d(attrs, inputs, out_type): padding = get_const_tuple(attrs.padding) strides = get_const_tuple(attrs.strides)
def extract_task_from_relay( mod: IRModule, target: Target, params: Optional[Dict[str, NDArray]] = None, *, opt_level: int = 3, pass_config: Optional[Dict[str, Any]] = None, disabled_pass: Optional[List[str]] = None, te_filter_func: Union[str, None, Callable[[List[Tensor]], PrimFunc]] = None, ) -> List[ExtractedTask]: """Extract tuning tasks from a relay program. Parameters ---------- mod : IRModule The module or function to tune target : tvm.target.Target The compilation target params : Optional[Dict[str, tvm.runtime.NDArray]] The associated parameters of the program opt_level : int The optimization level of the compiler pass_config : Optional[Dict[str, Any]] The pass config of the compiler disabled_pass : Optional[List[str]] The list of disabled passes of the compiler te_filter_func : Callable[[List[tvm.te.Tensor]], bool] The filter function to filter out the extracted tasks If it's a string, it's the name of the filtering function. Built in functions are - "meta_schedule.DefaultTaskFilter" - "meta_schedule.DefaultTaskFilterAllowExtern" If it's None, it's the default filtering function If it's a callable, it's the filtering function Returns ------- tasks: List[ExtractedTask] The tasks extracted from this network """ # pylint: disable=import-outside-toplevel from tvm import autotvm from tvm.relay import Function as RelayFunc # pylint: enable=import-outside-toplevel if isinstance(te_filter_func, str): te_filter_func = get_global_func(te_filter_func) extract_task_func = get_global_func( "relay.backend.MetaScheduleExtractTask", allow_missing=False, ) if isinstance(mod, RelayFunc): mod = IRModule.from_expr(mod) if not isinstance(target, Target): target = Target(target) if disabled_pass is None: disabled_pass = [] if pass_config is None: pass_config = {"relay.backend.use_meta_schedule": True} if params is None: params = {} relay_params = {} for name, param in params.items(): if isinstance(param, np.ndarray): param = nd.array(param) relay_params[name] = param with target, autotvm_silencer(), transform.PassContext( opt_level=opt_level, config=pass_config, disabled_pass=disabled_pass, ): if target.kind.name != "cuda" and isinstance( autotvm.DispatchContext.current, autotvm.FallbackContext): tophub_context = autotvm.tophub.context(target) else: tophub_context = autotvm.utils.EmptyContext() with tophub_context: return list( extract_task_func(mod, target, relay_params, te_filter_func))
def is_thrust_available(): """ Test if thrust based sorting ops are available. """ return get_global_func("tvm.contrib.thrust.sort", allow_missing=True) is not None
def can_use_thrust(target, func_name): maybe_warn(target, func_name) return (target.kind.name in ["cuda", "nvptx"] and "thrust" in target.libs and get_global_func(func_name, allow_missing=True))
def reset_cpu_affinity(affinity): # Restore the CPU affinity into the default value. config_threadpool = get_global_func("runtime.config_threadpool") config_threadpool(-2, 0) os.sched_setaffinity(0, affinity)
def optimize_torch( func, example_inputs, tuning_config=None, target=None, work_dir=None, ): """Load PyTorch model that could be traced by TorchScript, then optimize it via MetaSchedule. Parameters ---------- func : callable or torch.nn.Module A Python function or nn.Module that could run by TorchScript's trace. (ie: torch.jit.trace(model, input)) example_inputs : tuple or torch.Tensor Inputs to `torch.jit.trace`. tuning_config : tvm.meta_schedule.TuneConfig The configuration for tuning by MetaSchedule. If user doesn't set the config, the tuning will run with a default setting. Here, the total number of trials is proportional to the number of tunable tasks in the input module. target : Optional[Union[str, Target]] The target of the compilation. If user doesn't set the target, the module will be built for the CPU target. work_dir : Optional[str] The working directory to save intermediate results. Returns ------- mod : GraphExecutorFactoryWrapper It will return an object of GraphExecutorFactoryWrapper, which is the subclass of the original nn.Module. """ if target is None: target = llvm_target() if tuning_config is None: warning_msg = ( "Using the default tuning parameters.", "The default number of trials is set to a small value to let tuning finish quickly.", "For optimal performance, it is recommended to provide", "the `tuning_config` argument with a bigger number of trials.", ) warnings.warn(" ".join(warning_msg), stacklevel=2) # If `func` is already a traced module this statement makes no effect jit_mod = torch.jit.trace(func, example_inputs) if isinstance(example_inputs, torch.Tensor): example_inputs = [example_inputs] shape_list = [(f"inp_{idx}", i.shape) for idx, i in enumerate(example_inputs)] mod, params = relay.frontend.from_pytorch(jit_mod, shape_list) # IRmodule if work_dir: context_manager = contextlib.nullcontext(work_dir) else: context_manager = tempfile.TemporaryDirectory() with context_manager as work_dir_path: executor_factory = tune_relay_auto( mod=mod, params=params, config=tuning_config, target=target, work_dir=work_dir_path ) save_runtime_mod = get_global_func("tvmtorch.save_runtime_mod") save_runtime_mod(executor_factory.module) return GraphExecutorFactoryWrapper(torch.classes.tvm_torch.GraphExecutorFactoryWrapper())