def __init__( self, timeout=10, n_parallel=None, build_kwargs=None, build_func="default", do_fork=False, runtime=None, ): super(LocalBuilder, self).__init__(timeout, n_parallel, build_kwargs) if isinstance(build_func, str): if build_func == "default": build_func = tar.tar elif build_func == "ndk": build_func = ndk.create_shared elif build_func == "stackvm": build_func = stackvm.build else: raise ValueError("Invalid build_func" + build_func) self.build_func = _WrappedBuildFunc(build_func, runtime) if not do_fork: assert n_parallel in ( None, 1, ), f"if do_fork=False, need n_parallel=None or 1; got {n_parallel}" self.executor = PopenPoolExecutor( timeout=timeout, initializer=reset_global_scope, initargs=(AutotvmGlobalScope.current, )) self.tmp_dir = tempfile.mkdtemp()
def __init__( self, rpc_config: Optional[RPCConfig] = None, evaluator_config: Optional[EvaluatorConfig] = None, cooldown_sec: float = 0.0, alloc_repeat: int = 1, f_create_session: Union[T_CREATE_SESSION, str, None] = None, f_upload_module: Union[T_UPLOAD_MODULE, str, None] = None, f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] = None, f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] = None, f_cleanup: Union[T_CLEANUP, str, None] = None, max_workers: Optional[int] = None, initializer: Optional[Callable[[], None]] = None, ) -> None: """Constructor Parameters ---------- rpc_config: RPCConfig The rpc configuration. evaluator_config: EvaluatorConfig The evaluator configuration. cooldown_sec: float The cooldown in seconds. alloc_repeat: int The number of times to random fill the allocation. f_create_session: Union[T_CREATE_SESSION, str, None] The function name to create the session or the function itself. f_upload_module: Union[T_UPLOAD_MODULE, str, None] The function name to upload the module or the function itself. f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] The function name to allocate the arguments or the function itself. f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] The function name to run the evaluator or the function itself. f_cleanup: Union[T_CLEANUP, str, None] The function name to cleanup the session or the function itself. max_workers: Optional[int] = None The maximum number of connections. Defaults to number of logical CPU cores. initializer: Optional[Callable[[], None]] The initializer function. """ super().__init__() self.rpc_config = RPCConfig._normalized(rpc_config) self.evaluator_config = EvaluatorConfig._normalized(evaluator_config) self.cooldown_sec = cooldown_sec self.alloc_repeat = alloc_repeat self.f_create_session = f_create_session self.f_upload_module = f_upload_module self.f_alloc_argument = f_alloc_argument self.f_run_evaluator = f_run_evaluator self.f_cleanup = f_cleanup if max_workers is None: max_workers = cpu_count(logical=True) logger.info("RPCRunner: max_workers = %d", max_workers) self.pool = PopenPoolExecutor( max_workers=max_workers, timeout=rpc_config.session_timeout_sec, initializer=initializer, ) self._sanity_check()
def local_builder_build(inputs, timeout, n_parallel, build_func="default", verbose=1): """ Build function of LocalBuilder to build the MeasureInputs to runnable modules. Parameters ---------- inputs : List[MeasureInput] The MeasureInputs to be built. timeout : int The timeout limit (in second) for each build thread. This is used in a wrapper of the multiprocessing.Process.join(). n_parallel : int Number of threads used to build in parallel. build_func : str = 'default' The name of build function to process the built module. verbose: int = 1 Verbosity level. 0 for silent, 1 to output information during program building. Returns ------- res : List[BuildResult] The build results of these MeasureInputs. """ assert build_func == BuildFunc.name, ( "BuildFunc.name: " + BuildFunc.name + ", but args is: " + build_func ) executor = PopenPoolExecutor( n_parallel, timeout, reset_global_scope, (AutotvmGlobalScope.current,) ) tuple_res = executor.map_with_error_catching( local_build_worker, [ ( i.serialize(), BuildFunc.build_func, verbose, ) for i in inputs ], ) results = [] for res in tuple_res: if res.status == StatusKind.COMPLETE: results.append(BuildResult(*res.value)) elif res.status == StatusKind.TIMEOUT: if verbose >= 1: print(".T", end="", flush=True) # Build timeout results.append(BuildResult(None, [], MeasureErrorNo.BUILD_TIMEOUT, None, timeout)) elif res.status == StatusKind.EXCEPTION: if verbose >= 1: print(".E", end="", flush=True) # Build error results.append( BuildResult(None, [], MeasureErrorNo.COMPILE_HOST, repr(res.value), timeout) ) else: raise ValueError("Result status is not expected. Unreachable branch") return results
def test_popen_pool_executor_recycles(): pool = PopenPoolExecutor(max_workers=1, timeout=None, maximum_process_uses=2) initial_pid = pool.submit(os.getpid).result() assert initial_pid == pool.submit(os.getpid).result() assert initial_pid != pool.submit(os.getpid).result()
def test_popen_pool_executor(): import tvm pool = PopenPoolExecutor(max_workers=2, timeout=0.01) value1 = pool.submit(identity_after, 1, 100) value2 = pool.submit(terminate_self) value3 = pool.submit(identity_after, 3, 0) value4 = pool.submit(tvm.runtime.String, "xyz") with pytest.raises(TimeoutError): value1.result() with pytest.raises(ChildProcessError): value2.result() assert value3.result() == 3 value = value4.result() assert isinstance(value, tvm.runtime.String) assert value == "xyz" pool = PopenPoolExecutor(max_workers=4, timeout=None) values = pool.map_with_error_catching(lambda x: x, range(100)) for idx, val in enumerate(values): assert val.value == idx
def test_popen_pool_executor_timeout(): timeout = 0.5 pool = PopenPoolExecutor(timeout=timeout) f1 = pool.submit(timeout_job, timeout) while not f1.done(): pass try: res = f1.result() except Exception as ex: assert isinstance(ex, TimeoutError)
def test_popen_pool_executor_async(): pool = PopenPoolExecutor() f1 = pool.submit(slow_summation, 9999999) f2 = pool.submit(fast_summation, 9999999) t1 = 0 t2 = 0 while True: if t1 == 0 and f1.done(): t1 = time.time() if t2 == 0 and f2.done(): t2 = time.time() if t1 != 0 and t2 != 0: break assert t2 < t1, "Expected fast async job to finish first!" assert f1.result() == f2.result()
def local_builder_build(inputs, timeout, n_parallel, build_func="default", verbose=1): """ Build function of LocalBuilder to build the MeasureInputs to runnable modules. Parameters ---------- inputs : List[MeasureInput] The MeasureInputs to be built. timeout : int The timeout limit (in second) for each build thread. This is used in a wrapper of the multiprocessing.Process.join(). n_parallel : int Number of threads used to build in parallel. build_func : str = 'default' The name of build function to process the built module. verbose: int = 1 Verbosity level. 0 for silent, 1 to output information during program building. Returns ------- res : List[BuildResult] The build results of these MeasureInputs. """ executor = PopenPoolExecutor(n_parallel, timeout) tuple_res = executor.map_with_error_catching( local_build_worker, [ ( i.serialize(), build_func, timeout, verbose, ) for i in inputs ], ) results = [] for res in tuple_res: if res.status == StatusKind.COMPLETE: results.append(BuildResult(*res.value)) else: assert res.status == StatusKind.TIMEOUT if verbose >= 1: print(".T", end="", flush=True) # Build timeout results.append(BuildResult(None, [], MeasureErrorNo.BUILD_TIMEOUT, None, timeout)) return results
def _reset_pool(self, space, target, task): """reset processing pool for feature extraction""" if self.upper_model: # base model will reuse upper model's pool, self.upper_model._reset_pool(space, target, task) return self._close_pool() self.pool = PopenPoolExecutor( max_workers=self.num_threads, initializer=_extract_popen_initializer, initargs=(space, target, task), )
def __init__( self, key, host, port, priority=1, timeout=10, n_parallel=None, number=4, repeat=3, min_repeat_ms=0, cooldown_interval=0.1, enable_cpu_cache_flush=False, module_loader=None, ): super(RPCRunner, self).__init__(timeout, n_parallel) self.key = key self.host = host self.port = port self.priority = priority self.timeout = timeout self.number = number self.repeat = repeat self.min_repeat_ms = min_repeat_ms self._ref_input = None self.enable_cpu_cache_flush = enable_cpu_cache_flush self.cooldown_interval = cooldown_interval self.module_loader = module_loader self.executor = PopenPoolExecutor( timeout=timeout * (self.n_parallel + 1), initializer=reset_global_scope, initargs=(AutotvmGlobalScope.current, ), )
def pool_map(func, args, batch_size, verbose=False, pool=None): """A wrapper of multiprocessing.pool.Pool.map to support small-batch mapping for large argument list. This can reduce memory usage Parameters ---------- func: Func(arg) -> np.ndarray mapping function args: List list of arguments batch_size: int batch size in mapping verbose: bool, optional whether print progress pool: multiprocessing.Pool, optional pool objection Returns ------- converted numpy array """ ret = None tic = time.time() local_pool = pool or PopenPoolExecutor() if verbose: logger.info("mapping begin") for i in range(0, len(args), batch_size): if verbose: logger.info("mapping %d/%d elapsed %.2f", i, len(args), time.time() - tic) tmp = np.array(local_pool.map(func, args[i:i + batch_size])) ret = tmp if ret is None else np.concatenate((ret, tmp)) if verbose: logger.info("mapping done") if not pool: local_pool.close() return ret
def rpc_runner_run( inputs, build_results, key, host, port, priority=1, n_parallel=1, timeout=10, number=3, repeat=1, min_repeat_ms=0, cooldown_interval=0.0, enable_cpu_cache_flush=False, verbose=1, ): """Run function of RPCRunner to test the performance of the input BuildResults. Parameters ---------- inputs : List[MeasureInput] The MeasureInputs to be measured. build_results : List[BuildResult] The BuildResults to be measured. key : str The key of the device registered in the RPC tracker. host : str The host address of the RPC Tracker. port : int The port of RPC Tracker. priority : int = 1 The priority of this run request, larger is more prior. n_parallel : int = 1 The number of tasks run in parallel. timeout : int = 10 The timeout limit (in second) for each run. This is used in a wrapper of the multiprocessing.Process.join(). number : int = 3 The number of times to run the generated code for taking average. We call these runs as one `repeat` of measurement. repeat : int = 1 The number of times to repeat the measurement. In total, the generated code will be run (1 + number x repeat) times, where the first "1" is warm up and will be discarded. The returned result contains `repeat` costs, each of which is an average of `number` costs. min_repeat_ms : int = 0 The minimum duration of one `repeat` in milliseconds. By default, one `repeat` contains `number` runs. If this parameter is set, the parameters `number` will be dynamically adjusted to meet the minimum duration requirement of one `repeat`. i.e., When the run time of one `repeat` falls below this time, the `number` parameter will be automatically increased. cooldown_interval : float = 0.0 The cool down interval between two measurements in seconds. enable_cpu_cache_flush: bool = False Whether to flush cache on CPU between repeated measurements. Flushing cache can make the measured latency of one operator closer to its actual latency during end-to-end inference. To make this option effective, the argument `number` should also be set to 1. This is only has effect on CPU task. verbose: int = 1 Verbosity level. 0 for silent, 1 to output information during program measuring. Returns ------- res : List[MeasureResult] The measure results of these MeasureInputs. """ assert len(inputs) == len( build_results), "Measure input size should be equal to build results" # This pool is not doing computationally intensive work, so we can use threads executor = PopenPoolExecutor(n_parallel) tuple_res = executor.map_with_error_catching( _rpc_run_worker, [( inp.serialize(), build_res, prepare_runner_args(inp, build_res), key, host, port, priority, timeout, number, repeat, min_repeat_ms, cooldown_interval, enable_cpu_cache_flush, verbose, ) for inp, build_res in zip(inputs, build_results)], ) results = [] for i, res in enumerate(tuple_res): if res.status == StatusKind.COMPLETE: results.append(MeasureResult(*res.value)) else: assert res.status == StatusKind.TIMEOUT if verbose >= 1: print("*T", end="") # Run timeout build_res = build_results[i] results.append( MeasureResult( (MAX_FLOAT, ), MeasureErrorNo.RUN_TIMEOUT, None, build_res.time_cost + timeout, time.time(), )) if verbose >= 1: print("") return results
class RPCRunner(PyRunner): """RPC based runner Parameters ---------- rpc_config: RPCConfig The rpc configuration. evaluator_config: EvaluatorConfig The evaluator configuration. cooldown_sec: float The cooldown in seconds. TODO(@junrushao1994,@zxybazh): This is not used yet. alloc_repeat: int The number of times to repeat the allocation. f_create_session: Optional[str, Callable] The function name to create the session or the function itself. f_upload_module: Optional[str, Callable] The function name to upload the module or the function itself. f_alloc_argument: Optional[str, Callable] The function name to allocate the arguments or the function itself. f_run_evaluator: Optional[str, Callable] The function name to run the evaluator or the function itself. f_cleanup: Optional[str, Callable] The function name to cleanup the session or the function itself. pool: PopenPoolExecutor The popen pool executor. Attributes ---------- T_CREATE_SESSION : typing._GenericAlias The signature of the function `f_create_session`, which is: .. code-block:: python def default_create_session(rpc_config: RPCConfig) -> RPCSession: ... T_UPLOAD_MODULE : typing._GenericAlias The signature of the function `f_upload_module`, which is: .. code-block:: python def default_upload_module( session: RPCSession, local_path: str, remote_path: str, ) -> Module: ... T_ALLOC_ARGUMENT : typing._GenericAlias The signature of the function `f_alloc_argument`, which is: .. code-block:: python def default_alloc_argument( session: RPCSession, device: Device, args_info: T_ARG_INFO_JSON_OBJ_LIST, alloc_repeat: int, ) -> List[T_ARGUMENT_LIST]: ... T_RUN_EVALUATOR : typing._GenericAlias The signature of the function `f_run_evaluator`, which is: .. code-block:: python def default_run_evaluator( session: RPCSession, rt_mod: Module, device: Device, evaluator_config: EvaluatorConfig, repeated_args: List[T_ARGUMENT_LIST], ) -> List[float]: ... T_CLEANUP : typing._GenericAlias The signature of the function `f_cleanup`, which is: .. code-block:: python def default_cleanup( session: Optional[RPCSession], remote_path: Optional[str], ) -> None: ... """ rpc_config: RPCConfig evaluator_config: EvaluatorConfig cooldown_sec: float alloc_repeat: int f_create_session: Union[T_CREATE_SESSION, str, None] f_upload_module: Union[T_UPLOAD_MODULE, str, None] f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] f_cleanup: Union[T_CLEANUP, str, None] pool: PopenPoolExecutor def __init__( self, rpc_config: Optional[RPCConfig] = None, evaluator_config: Optional[EvaluatorConfig] = None, cooldown_sec: float = 0.0, alloc_repeat: int = 1, f_create_session: Union[T_CREATE_SESSION, str, None] = None, f_upload_module: Union[T_UPLOAD_MODULE, str, None] = None, f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] = None, f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] = None, f_cleanup: Union[T_CLEANUP, str, None] = None, max_workers: Optional[int] = None, initializer: Optional[Callable[[], None]] = None, ) -> None: """Constructor Parameters ---------- rpc_config: RPCConfig The rpc configuration. evaluator_config: EvaluatorConfig The evaluator configuration. cooldown_sec: float The cooldown in seconds. alloc_repeat: int The number of times to random fill the allocation. f_create_session: Union[T_CREATE_SESSION, str, None] The function name to create the session or the function itself. f_upload_module: Union[T_UPLOAD_MODULE, str, None] The function name to upload the module or the function itself. f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] The function name to allocate the arguments or the function itself. f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] The function name to run the evaluator or the function itself. f_cleanup: Union[T_CLEANUP, str, None] The function name to cleanup the session or the function itself. max_workers: Optional[int] = None The maximum number of connections. Defaults to number of logical CPU cores. initializer: Optional[Callable[[], None]] The initializer function. """ super().__init__() self.rpc_config = RPCConfig._normalized(rpc_config) self.evaluator_config = EvaluatorConfig._normalized(evaluator_config) self.cooldown_sec = cooldown_sec self.alloc_repeat = alloc_repeat self.f_create_session = f_create_session self.f_upload_module = f_upload_module self.f_alloc_argument = f_alloc_argument self.f_run_evaluator = f_run_evaluator self.f_cleanup = f_cleanup if max_workers is None: max_workers = cpu_count(logical=True) logger.info("RPCRunner: max_workers = %d", max_workers) self.pool = PopenPoolExecutor( max_workers=max_workers, timeout=rpc_config.session_timeout_sec, initializer=initializer, ) self._sanity_check() def run(self, runner_inputs: List[RunnerInput]) -> List[RunnerFuture]: results: List[RunnerFuture] = [] for runner_input in runner_inputs: future = RPCRunnerFuture( future=self.pool.submit( _worker_func, self.f_create_session, self.f_upload_module, self.f_alloc_argument, self.f_run_evaluator, self.f_cleanup, self.rpc_config, self.evaluator_config, self.alloc_repeat, str(runner_input.artifact_path), str(runner_input.device_type), tuple(arg_info.as_json() for arg_info in runner_input.args_info), ), timeout_sec=self.rpc_config.session_timeout_sec, ) results.append(future) # type: ignore return results def _sanity_check(self) -> None: def _check( f_create_session, f_upload_module, f_alloc_argument, f_run_evaluator, f_cleanup, ) -> None: get_global_func_with_default_on_worker(name=f_create_session, default=None) get_global_func_with_default_on_worker(name=f_upload_module, default=None) get_global_func_with_default_on_worker(name=f_alloc_argument, default=None) get_global_func_with_default_on_worker(name=f_run_evaluator, default=None) get_global_func_with_default_on_worker(name=f_cleanup, default=None) value = self.pool.submit( _check, self.f_create_session, self.f_upload_module, self.f_alloc_argument, self.f_run_evaluator, self.f_cleanup, ) value.result()
class LocalBuilder(Builder): """Run compilation on local machine Parameters ---------- timeout: float The timeout of a compilation n_parallel: int The number of tasks run in parallel. "None" will use all cpu cores build_kwargs: dict If supplied, additional kwargs passed to build_func. Overrides any build_kwargs supplied by the Runner. build_func: callable or str If is 'default', use default build function If is 'ndk', use function for android ndk If id 'stackvm', use function for stackvm If is callable, use it as custom build function, expect lib_format field. do_fork: bool If False, do not fork when building. Requires n_parallel=1. runtime: Optional[Runtime] Specify the runtime to generate artifacts for """ def __init__( self, timeout=10, n_parallel=None, build_kwargs=None, build_func="default", do_fork=False, runtime=None, ): super(LocalBuilder, self).__init__(timeout, n_parallel, build_kwargs) if isinstance(build_func, str): if build_func == "default": build_func = tar.tar elif build_func == "ndk": build_func = ndk.create_shared elif build_func == "stackvm": build_func = stackvm.build else: raise ValueError("Invalid build_func" + build_func) self.build_func = _WrappedBuildFunc(build_func, runtime) if not do_fork: assert n_parallel in ( None, 1, ), f"if do_fork=False, need n_parallel=None or 1; got {n_parallel}" self.executor = PopenPoolExecutor( timeout=timeout, initializer=reset_global_scope, initargs=(AutotvmGlobalScope.current, )) self.tmp_dir = tempfile.mkdtemp() def build(self, measure_inputs): results = [] shutil.rmtree(self.tmp_dir, ignore_errors=True) self.tmp_dir = tempfile.mkdtemp() for i in range(0, len(measure_inputs), self.n_parallel): futures = [] for inp in measure_inputs[i:i + self.n_parallel]: ret = self.executor.submit(self.build_func, inp, self.tmp_dir, **self.build_kwargs) futures.append(ret) for future in futures: try: res = future.result() if res.error is not None: # instantiation error if isinstance(res.error, InstantiationError): res = MeasureResult( (res.error, ), MeasureErrorNo.INSTANTIATION_ERROR, res.time_cost, time.time(), ) else: if "InstantiationError" in str(res.error): msg = str(res.error) try: msg = msg.split("\n")[-2].split(": ")[1] except Exception: # pylint: disable=broad-except pass res = MeasureResult( (InstantiationError(msg), ), MeasureErrorNo.INSTANTIATION_ERROR, res.time_cost, time.time(), ) else: # tvm error res = MeasureResult( (res.error, ), MeasureErrorNo.COMPILE_HOST, res.time_cost, time.time(), ) except TimeoutError as ex: res = MeasureResult((ex, ), MeasureErrorNo.BUILD_TIMEOUT, self.timeout, time.time()) except ChildProcessError as ex: res = MeasureResult( (ex, ), MeasureErrorNo.RUNTIME_DEVICE, self.timeout, time.time(), ) results.append(res) return results
class RPCRunner(Runner): """Run generated code on remove devices. This function will ask a RPC Tracker to get device for measurement. Parameters ---------- timeout: float The timeout of a RPCRunner measurement task n_parallel: int The number of tasks run in parallel. "None" will use all cpu cores key: str The key of the device registered in the tracker host: str The host address of RPC Tracker port: int The port of RPC Tracker number: int The number of times to run the generated code for taking average. We call these runs as one `repeat` of measurement. repeat : int, optional The number of times to repeat the measurement. In total, the generated code will be run (1 + number x repeat) times, where the first "1" is warm up and will be discarded. The returned result contains `repeat` costs, each of which is an average of `number` costs. min_repeat_ms: int, optional The minimum duration of one `repeat` in milliseconds. By default, one `repeat` contains `number` runs. If this parameter is set, the parameters `number` will be dynamically adjusted to meet the minimum duration requirement of one `repeat`. i.e., When the run time of one `repeat` falls below this time, the `number` parameter will be automatically increased. cooldown_interval: float, optional The cool down interval between two measurements. enable_cpu_cache_flush: bool Whether to flush cache on CPU between repeated measurements. Flushing cache can make the measured latency of one operator closer to its actual latency during end-to-end inference. To make this option effective, the argument `number` should also be set to 1. This is only has effect on CPU task. module_loader : ModuleLoader If given, a context manager that loads the module to be timed into the remote runtime. If not given, default_module_loader is used. """ def __init__( self, key, host, port, priority=1, timeout=10, n_parallel=None, number=4, repeat=3, min_repeat_ms=0, cooldown_interval=0.1, enable_cpu_cache_flush=False, module_loader=None, ): super(RPCRunner, self).__init__(timeout, n_parallel) self.key = key self.host = host self.port = port self.priority = priority self.timeout = timeout self.number = number self.repeat = repeat self.min_repeat_ms = min_repeat_ms self._ref_input = None self.enable_cpu_cache_flush = enable_cpu_cache_flush self.cooldown_interval = cooldown_interval self.module_loader = module_loader self.executor = PopenPoolExecutor( timeout=timeout * (self.n_parallel + 1), initializer=reset_global_scope, initargs=(AutotvmGlobalScope.current, ), ) @property def ref_input(self): """ Fixed input for tuning special operators, e.g., sparse operators requiring indices as input. """ return self._ref_input @ref_input.setter def ref_input(self, val): if val is not None: warnings.warn( "You are specifying fixed input for tuning the operator. " "Be sure your input always fits the operator. Some " "operators may conduct layout transformation during tuning, " "thus can lead to unexpected behaviors. ", RuntimeWarning, ) self._ref_input = val def set_task(self, task): self.task = task if check_remote(task.target, self.key, self.host, self.port): logger.info("Get devices for measurement successfully!") else: raise RuntimeError( "Cannot get remote devices from the tracker. " "Please check the status of tracker by " "'python -m tvm.exec.query_rpc_tracker --port [THE PORT YOU USE]' " "and make sure you have free devices on the queue status.") def get_build_kwargs(self): kwargs = {} if ("cuda" in self.task.target.keys or "opencl" in self.task.target.keys or "rocm" in self.task.target.keys or "vulkan" in self.task.target.keys): remote = request_remote(self.key, self.host, self.port) dev = remote.device(str(self.task.target), 0) max_dims = dev.max_thread_dimensions kwargs["check_gpu"] = { "max_shared_memory_per_block": dev.max_shared_memory_per_block, "max_threads_per_block": dev.max_threads_per_block, "max_thread_x": max_dims[0], "max_thread_y": max_dims[1], "max_thread_z": max_dims[2], } return kwargs def run(self, measure_inputs, build_results): results = [] remote_kwargs = dict( device_key=self.key, host=self.host, port=self.port, priority=self.priority, timeout=self.timeout, ) for i in range(0, len(measure_inputs), self.n_parallel): futures = [] for measure_inp, build_res in zip( measure_inputs[i:i + self.n_parallel], build_results[i:i + self.n_parallel]): module_loader = (self.module_loader if self.module_loader is not None else default_module_loader()) ret = self.executor.submit( run_through_rpc, measure_inp, build_res, self.number, self.repeat, self.min_repeat_ms, self.cooldown_interval, remote_kwargs, self.ref_input, self.enable_cpu_cache_flush, module_loader, ) futures.append(ret) for future in futures: try: res = future.result() results.append(res) except Exception as ex: # pylint: disable=broad-except results.append( MeasureResult((str(ex), ), MeasureErrorNo.RUN_TIMEOUT, self.timeout, time.time())) return results
class RPCRunner(PyRunner): """RPC based runner Parameters ---------- rpc_config: RPCConfig The rpc configuration. evaluator_config: EvaluatorConfig The evaluator configuration. cooldown_sec: float The cooldown in seconds. TODO(@junrushao1994,@zxybazh): This is not used yet. alloc_repeat: int The number of times to repeat the allocation. f_create_session: Optional[str, Callable] The function name to create the session or the function itself. f_upload_module: Optional[str, Callable] The function name to upload the module or the function itself. f_alloc_argument: Optional[str, Callable] The function name to allocate the arguments or the function itself. f_run_evaluator: Optional[str, Callable] The function name to run the evaluator or the function itself. f_cleanup: Optional[str, Callable] The function name to cleanup the session or the function itself. pool: PopenPoolExecutor The popen pool executor. Attributes ---------- T_CREATE_SESSION : typing._GenericAlias The signature of the function `f_create_session`, which is: .. code-block:: python def default_create_session(rpc_config: RPCConfig) -> RPCSession: ... T_UPLOAD_MODULE : typing._GenericAlias The signature of the function `f_upload_module`, which is: .. code-block:: python def default_upload_module( session: RPCSession, local_path: str, remote_path: str, ) -> Module: ... T_ALLOC_ARGUMENT : typing._GenericAlias The signature of the function `f_alloc_argument`, which is: .. code-block:: python def default_alloc_argument( session: RPCSession, device: Device, args_info: T_ARG_INFO_JSON_OBJ_LIST, alloc_repeat: int, ) -> List[T_ARGUMENT_LIST]: ... T_RUN_EVALUATOR : typing._GenericAlias The signature of the function `f_run_evaluator`, which is: .. code-block:: python def default_run_evaluator( session: RPCSession, rt_mod: Module, device: Device, evaluator_config: EvaluatorConfig, repeated_args: List[T_ARGUMENT_LIST], ) -> List[float]: ... T_CLEANUP : typing._GenericAlias The signature of the function `f_cleanup`, which is: .. code-block:: python def default_cleanup( session: Optional[RPCSession], remote_path: Optional[str], ) -> None: ... """ T_CREATE_SESSION = Callable[ [RPCConfig], # The RPC configuration RPCSession, # The RPC Session ] T_UPLOAD_MODULE = Callable[ [ RPCSession, # The RPC Session str, # local path to the artifact str, # remote path to the artifact ], Module, # the Module opened on the remote ] T_ALLOC_ARGUMENT = Callable[ [ RPCSession, # The RPC Session Device, # The device on the remote T_ARG_INFO_JSON_OBJ_LIST, # The metadata information of the arguments to be allocated int, # The number of repeated allocations to be done ], List[T_ARGUMENT_LIST], # A list of argument lists ] T_RUN_EVALUATOR = Callable[ [ RPCSession, # The RPC Session Module, # The Module opened on the remote Device, # The device on the remote EvaluatorConfig, # The evaluator configuration List[T_ARGUMENT_LIST], # A list of argument lists ], List[float], # A list of running time ] T_CLEANUP = Callable[ [ Optional[RPCSession], # The RPC Session to be cleaned up Optional[str], # remote path to the artifact ], None, ] rpc_config: RPCConfig evaluator_config: EvaluatorConfig cooldown_sec: float alloc_repeat: int f_create_session: Union[T_CREATE_SESSION, str, None] f_upload_module: Union[T_UPLOAD_MODULE, str, None] f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] f_cleanup: Union[T_CLEANUP, str, None] pool: PopenPoolExecutor def __init__( self, rpc_config: Optional[RPCConfig] = None, evaluator_config: Optional[EvaluatorConfig] = None, cooldown_sec: float = 0.0, alloc_repeat: int = 1, f_create_session: Union[T_CREATE_SESSION, str, None] = None, f_upload_module: Union[T_UPLOAD_MODULE, str, None] = None, f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] = None, f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] = None, f_cleanup: Union[T_CLEANUP, str, None] = None, max_workers: int = 1, initializer: Optional[Callable[[], None]] = None, ) -> None: """Constructor Parameters ---------- rpc_config: RPCConfig The rpc configuration. evaluator_config: EvaluatorConfig The evaluator configuration. cooldown_sec: float The cooldown in seconds. alloc_repeat: int The number of times to random fill the allocation. f_create_session: Union[T_CREATE_SESSION, str, None] The function name to create the session or the function itself. f_upload_module: Union[T_UPLOAD_MODULE, str, None] The function name to upload the module or the function itself. f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] The function name to allocate the arguments or the function itself. f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] The function name to run the evaluator or the function itself. f_cleanup: Union[T_CLEANUP, str, None] The function name to cleanup the session or the function itself. max_workers: int = 1 The maximum number of connections. Defaults to 1. initializer: Optional[Callable[[], None]] The initializer function. """ super().__init__() self.rpc_config = RPCConfig._normalized(rpc_config) self.evaluator_config = EvaluatorConfig._normalized(evaluator_config) self.cooldown_sec = cooldown_sec self.alloc_repeat = alloc_repeat self.f_create_session = f_create_session self.f_upload_module = f_upload_module self.f_alloc_argument = f_alloc_argument self.f_run_evaluator = f_run_evaluator self.f_cleanup = f_cleanup logger.info("RPCRunner: max_workers = %d", max_workers) self.pool = PopenPoolExecutor( max_workers=max_workers, timeout=rpc_config.session_timeout_sec, initializer=initializer, ) self._sanity_check() def run(self, runner_inputs: List[RunnerInput]) -> List[RunnerFuture]: results: List[RunnerFuture] = [] for runner_input in runner_inputs: future = RPCRunnerFuture( future=self.pool.submit( RPCRunner._worker_func, self.f_create_session, self.f_upload_module, self.f_alloc_argument, self.f_run_evaluator, self.f_cleanup, self.rpc_config, self.evaluator_config, self.alloc_repeat, str(runner_input.artifact_path), str(runner_input.device_type), tuple(arg_info.as_json() for arg_info in runner_input.args_info), ), timeout_sec=self.rpc_config.session_timeout_sec, ) results.append(future) return results def _sanity_check(self) -> None: def _check( f_create_session, f_upload_module, f_alloc_argument, f_run_evaluator, f_cleanup, ) -> None: get_global_func_with_default_on_worker(name=f_create_session, default=None) get_global_func_with_default_on_worker(name=f_upload_module, default=None) get_global_func_with_default_on_worker(name=f_alloc_argument, default=None) get_global_func_with_default_on_worker(name=f_run_evaluator, default=None) get_global_func_with_default_on_worker(name=f_cleanup, default=None) value = self.pool.submit( _check, self.f_create_session, self.f_upload_module, self.f_alloc_argument, self.f_run_evaluator, self.f_cleanup, ) value.result() @staticmethod def _worker_func( _f_create_session: Union[T_CREATE_SESSION, str, None], _f_upload_module: Union[T_UPLOAD_MODULE, str, None], _f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None], _f_run_evaluator: Union[T_RUN_EVALUATOR, str, None], _f_cleanup: Union[T_CLEANUP, str, None], rpc_config: RPCConfig, evaluator_config: EvaluatorConfig, alloc_repeat: int, artifact_path: str, device_type: str, args_info: T_ARG_INFO_JSON_OBJ_LIST, ) -> List[float]: # Step 0. Get the registered functions f_create_session: RPCRunner.T_CREATE_SESSION = get_global_func_with_default_on_worker( _f_create_session, default_create_session ) f_upload_module: RPCRunner.T_UPLOAD_MODULE = get_global_func_with_default_on_worker( _f_upload_module, default_upload_module ) f_alloc_argument: RPCRunner.T_ALLOC_ARGUMENT = get_global_func_with_default_on_worker( _f_alloc_argument, default_alloc_argument ) f_run_evaluator: RPCRunner.T_RUN_EVALUATOR = get_global_func_with_default_on_worker( _f_run_evaluator, default_run_evaluator ) f_cleanup: RPCRunner.T_CLEANUP = get_global_func_with_default_on_worker( _f_cleanup, default_cleanup ) # Managed resources session: Optional[RPCSession] = None remote_path: Optional[str] = None @contextmanager def resource_handler(): try: yield finally: # Final step. Always clean up f_cleanup(session, remote_path) with resource_handler(): # Step 1. Create session session = f_create_session(rpc_config) device = session.device(dev_type=device_type, dev_id=0) # Step 2. Upload the module _, remote_path = osp.split(artifact_path) local_path: str = artifact_path rt_mod: Module = f_upload_module(session, local_path, remote_path) # Step 3: Allocate input arguments repeated_args: List[T_ARGUMENT_LIST] = f_alloc_argument( session, device, args_info, alloc_repeat, ) # Step 4: Run time_evaluator costs: List[float] = f_run_evaluator( session, rt_mod, device, evaluator_config, repeated_args, ) return costs