Example #1
0
    def __init__(
        self,
        timeout=10,
        n_parallel=None,
        build_kwargs=None,
        build_func="default",
        do_fork=False,
        runtime=None,
    ):
        super(LocalBuilder, self).__init__(timeout, n_parallel, build_kwargs)

        if isinstance(build_func, str):
            if build_func == "default":
                build_func = tar.tar
            elif build_func == "ndk":
                build_func = ndk.create_shared
            elif build_func == "stackvm":
                build_func = stackvm.build
            else:
                raise ValueError("Invalid build_func" + build_func)
        self.build_func = _WrappedBuildFunc(build_func, runtime)
        if not do_fork:
            assert n_parallel in (
                None,
                1,
            ), f"if do_fork=False, need n_parallel=None or 1; got {n_parallel}"
        self.executor = PopenPoolExecutor(
            timeout=timeout,
            initializer=reset_global_scope,
            initargs=(AutotvmGlobalScope.current, ))
        self.tmp_dir = tempfile.mkdtemp()
Example #2
0
    def __init__(
        self,
        rpc_config: Optional[RPCConfig] = None,
        evaluator_config: Optional[EvaluatorConfig] = None,
        cooldown_sec: float = 0.0,
        alloc_repeat: int = 1,
        f_create_session: Union[T_CREATE_SESSION, str, None] = None,
        f_upload_module: Union[T_UPLOAD_MODULE, str, None] = None,
        f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] = None,
        f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] = None,
        f_cleanup: Union[T_CLEANUP, str, None] = None,
        max_workers: Optional[int] = None,
        initializer: Optional[Callable[[], None]] = None,
    ) -> None:
        """Constructor

        Parameters
        ----------
        rpc_config: RPCConfig
            The rpc configuration.
        evaluator_config: EvaluatorConfig
            The evaluator configuration.
        cooldown_sec: float
            The cooldown in seconds.
        alloc_repeat: int
            The number of times to random fill the allocation.
        f_create_session: Union[T_CREATE_SESSION, str, None]
            The function name to create the session or the function itself.
        f_upload_module: Union[T_UPLOAD_MODULE, str, None]
            The function name to upload the module or the function itself.
        f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None]
            The function name to allocate the arguments or the function itself.
        f_run_evaluator: Union[T_RUN_EVALUATOR, str, None]
            The function name to run the evaluator or the function itself.
        f_cleanup: Union[T_CLEANUP, str, None]
            The function name to cleanup the session or the function itself.
        max_workers: Optional[int] = None
            The maximum number of connections. Defaults to number of logical CPU cores.
        initializer: Optional[Callable[[], None]]
            The initializer function.
        """
        super().__init__()
        self.rpc_config = RPCConfig._normalized(rpc_config)
        self.evaluator_config = EvaluatorConfig._normalized(evaluator_config)
        self.cooldown_sec = cooldown_sec
        self.alloc_repeat = alloc_repeat
        self.f_create_session = f_create_session
        self.f_upload_module = f_upload_module
        self.f_alloc_argument = f_alloc_argument
        self.f_run_evaluator = f_run_evaluator
        self.f_cleanup = f_cleanup
        if max_workers is None:
            max_workers = cpu_count(logical=True)
        logger.info("RPCRunner: max_workers = %d", max_workers)
        self.pool = PopenPoolExecutor(
            max_workers=max_workers,
            timeout=rpc_config.session_timeout_sec,
            initializer=initializer,
        )
        self._sanity_check()
Example #3
0
def local_builder_build(inputs, timeout, n_parallel, build_func="default", verbose=1):
    """
    Build function of LocalBuilder to build the MeasureInputs to runnable modules.

    Parameters
    ----------
    inputs : List[MeasureInput]
        The MeasureInputs to be built.
    timeout : int
        The timeout limit (in second) for each build thread.
        This is used in a wrapper of the multiprocessing.Process.join().
    n_parallel : int
        Number of threads used to build in parallel.
    build_func : str = 'default'
        The name of build function to process the built module.
    verbose: int = 1
        Verbosity level. 0 for silent, 1 to output information during program building.

    Returns
    -------
    res : List[BuildResult]
        The build results of these MeasureInputs.
    """
    assert build_func == BuildFunc.name, (
        "BuildFunc.name: " + BuildFunc.name + ", but args is: " + build_func
    )
    executor = PopenPoolExecutor(
        n_parallel, timeout, reset_global_scope, (AutotvmGlobalScope.current,)
    )
    tuple_res = executor.map_with_error_catching(
        local_build_worker,
        [
            (
                i.serialize(),
                BuildFunc.build_func,
                verbose,
            )
            for i in inputs
        ],
    )

    results = []
    for res in tuple_res:
        if res.status == StatusKind.COMPLETE:
            results.append(BuildResult(*res.value))
        elif res.status == StatusKind.TIMEOUT:
            if verbose >= 1:
                print(".T", end="", flush=True)  # Build timeout
            results.append(BuildResult(None, [], MeasureErrorNo.BUILD_TIMEOUT, None, timeout))
        elif res.status == StatusKind.EXCEPTION:
            if verbose >= 1:
                print(".E", end="", flush=True)  # Build error
            results.append(
                BuildResult(None, [], MeasureErrorNo.COMPILE_HOST, repr(res.value), timeout)
            )
        else:
            raise ValueError("Result status is not expected. Unreachable branch")

    return results
Example #4
0
def test_popen_pool_executor_recycles():
    pool = PopenPoolExecutor(max_workers=1,
                             timeout=None,
                             maximum_process_uses=2)

    initial_pid = pool.submit(os.getpid).result()
    assert initial_pid == pool.submit(os.getpid).result()
    assert initial_pid != pool.submit(os.getpid).result()
def test_popen_pool_executor():
    import tvm

    pool = PopenPoolExecutor(max_workers=2, timeout=0.01)
    value1 = pool.submit(identity_after, 1, 100)
    value2 = pool.submit(terminate_self)
    value3 = pool.submit(identity_after, 3, 0)
    value4 = pool.submit(tvm.runtime.String, "xyz")

    with pytest.raises(TimeoutError):
        value1.result()

    with pytest.raises(ChildProcessError):
        value2.result()

    assert value3.result() == 3
    value = value4.result()
    assert isinstance(value, tvm.runtime.String)
    assert value == "xyz"

    pool = PopenPoolExecutor(max_workers=4, timeout=None)
    values = pool.map_with_error_catching(lambda x: x, range(100))

    for idx, val in enumerate(values):
        assert val.value == idx
def test_popen_pool_executor_timeout():
    timeout = 0.5

    pool = PopenPoolExecutor(timeout=timeout)

    f1 = pool.submit(timeout_job, timeout)
    while not f1.done():
        pass
    try:
        res = f1.result()
    except Exception as ex:
        assert isinstance(ex, TimeoutError)
def test_popen_pool_executor_async():
    pool = PopenPoolExecutor()
    f1 = pool.submit(slow_summation, 9999999)
    f2 = pool.submit(fast_summation, 9999999)
    t1 = 0
    t2 = 0
    while True:
        if t1 == 0 and f1.done():
            t1 = time.time()
        if t2 == 0 and f2.done():
            t2 = time.time()
        if t1 != 0 and t2 != 0:
            break
    assert t2 < t1, "Expected fast async job to finish first!"
    assert f1.result() == f2.result()
Example #8
0
def local_builder_build(inputs, timeout, n_parallel, build_func="default", verbose=1):
    """
    Build function of LocalBuilder to build the MeasureInputs to runnable modules.

    Parameters
    ----------
    inputs : List[MeasureInput]
        The MeasureInputs to be built.
    timeout : int
        The timeout limit (in second) for each build thread.
        This is used in a wrapper of the multiprocessing.Process.join().
    n_parallel : int
        Number of threads used to build in parallel.
    build_func : str = 'default'
        The name of build function to process the built module.
    verbose: int = 1
        Verbosity level. 0 for silent, 1 to output information during program building.

    Returns
    -------
    res : List[BuildResult]
        The build results of these MeasureInputs.
    """
    executor = PopenPoolExecutor(n_parallel, timeout)
    tuple_res = executor.map_with_error_catching(
        local_build_worker,
        [
            (
                i.serialize(),
                build_func,
                timeout,
                verbose,
            )
            for i in inputs
        ],
    )

    results = []
    for res in tuple_res:
        if res.status == StatusKind.COMPLETE:
            results.append(BuildResult(*res.value))
        else:
            assert res.status == StatusKind.TIMEOUT
            if verbose >= 1:
                print(".T", end="", flush=True)  # Build timeout
            results.append(BuildResult(None, [], MeasureErrorNo.BUILD_TIMEOUT, None, timeout))

    return results
Example #9
0
    def _reset_pool(self, space, target, task):
        """reset processing pool for feature extraction"""

        if self.upper_model:  # base model will reuse upper model's pool,
            self.upper_model._reset_pool(space, target, task)
            return

        self._close_pool()

        self.pool = PopenPoolExecutor(
            max_workers=self.num_threads,
            initializer=_extract_popen_initializer,
            initargs=(space, target, task),
        )
Example #10
0
    def __init__(
        self,
        key,
        host,
        port,
        priority=1,
        timeout=10,
        n_parallel=None,
        number=4,
        repeat=3,
        min_repeat_ms=0,
        cooldown_interval=0.1,
        enable_cpu_cache_flush=False,
        module_loader=None,
    ):
        super(RPCRunner, self).__init__(timeout, n_parallel)

        self.key = key
        self.host = host
        self.port = port
        self.priority = priority
        self.timeout = timeout

        self.number = number
        self.repeat = repeat
        self.min_repeat_ms = min_repeat_ms
        self._ref_input = None

        self.enable_cpu_cache_flush = enable_cpu_cache_flush
        self.cooldown_interval = cooldown_interval
        self.module_loader = module_loader

        self.executor = PopenPoolExecutor(
            timeout=timeout * (self.n_parallel + 1),
            initializer=reset_global_scope,
            initargs=(AutotvmGlobalScope.current, ),
        )
Example #11
0
def pool_map(func, args, batch_size, verbose=False, pool=None):
    """A wrapper of multiprocessing.pool.Pool.map to support small-batch mapping
    for large argument list. This can reduce memory usage

    Parameters
    ----------
    func: Func(arg) -> np.ndarray
        mapping function
    args: List
        list of arguments
    batch_size: int
        batch size in mapping
    verbose: bool, optional
        whether print progress
    pool: multiprocessing.Pool, optional
        pool objection

    Returns
    -------
    converted numpy array
    """

    ret = None
    tic = time.time()
    local_pool = pool or PopenPoolExecutor()
    if verbose:
        logger.info("mapping begin")
    for i in range(0, len(args), batch_size):
        if verbose:
            logger.info("mapping %d/%d elapsed %.2f", i, len(args),
                        time.time() - tic)
        tmp = np.array(local_pool.map(func, args[i:i + batch_size]))
        ret = tmp if ret is None else np.concatenate((ret, tmp))
    if verbose:
        logger.info("mapping done")
    if not pool:
        local_pool.close()
    return ret
Example #12
0
def rpc_runner_run(
    inputs,
    build_results,
    key,
    host,
    port,
    priority=1,
    n_parallel=1,
    timeout=10,
    number=3,
    repeat=1,
    min_repeat_ms=0,
    cooldown_interval=0.0,
    enable_cpu_cache_flush=False,
    verbose=1,
):
    """Run function of RPCRunner to test the performance of the input BuildResults.

    Parameters
    ----------
    inputs : List[MeasureInput]
        The MeasureInputs to be measured.
    build_results : List[BuildResult]
        The BuildResults to be measured.
    key : str
        The key of the device registered in the RPC tracker.
    host : str
        The host address of the RPC Tracker.
    port : int
        The port of RPC Tracker.
    priority : int = 1
        The priority of this run request, larger is more prior.
    n_parallel : int = 1
        The number of tasks run in parallel.
    timeout : int = 10
        The timeout limit (in second) for each run.
        This is used in a wrapper of the multiprocessing.Process.join().
    number : int = 3
        The number of times to run the generated code for taking average.
        We call these runs as one `repeat` of measurement.
    repeat : int = 1
        The number of times to repeat the measurement.
        In total, the generated code will be run (1 + number x repeat) times,
        where the first "1" is warm up and will be discarded.
        The returned result contains `repeat` costs,
        each of which is an average of `number` costs.
    min_repeat_ms : int = 0
        The minimum duration of one `repeat` in milliseconds.
        By default, one `repeat` contains `number` runs. If this parameter is set,
        the parameters `number` will be dynamically adjusted to meet the
        minimum duration requirement of one `repeat`.
        i.e., When the run time of one `repeat` falls below this time, the `number` parameter
        will be automatically increased.
    cooldown_interval : float = 0.0
        The cool down interval between two measurements in seconds.
    enable_cpu_cache_flush: bool = False
        Whether to flush cache on CPU between repeated measurements.
        Flushing cache can make the measured latency of one operator closer to
        its actual latency during end-to-end inference.
        To make this option effective, the argument `number` should also be set to 1.
        This is only has effect on CPU task.
    verbose: int = 1
        Verbosity level. 0 for silent, 1 to output information during program measuring.

    Returns
    -------
    res : List[MeasureResult]
        The measure results of these MeasureInputs.
    """
    assert len(inputs) == len(
        build_results), "Measure input size should be equal to build results"
    # This pool is not doing computationally intensive work, so we can use threads
    executor = PopenPoolExecutor(n_parallel)
    tuple_res = executor.map_with_error_catching(
        _rpc_run_worker,
        [(
            inp.serialize(),
            build_res,
            prepare_runner_args(inp, build_res),
            key,
            host,
            port,
            priority,
            timeout,
            number,
            repeat,
            min_repeat_ms,
            cooldown_interval,
            enable_cpu_cache_flush,
            verbose,
        ) for inp, build_res in zip(inputs, build_results)],
    )

    results = []
    for i, res in enumerate(tuple_res):
        if res.status == StatusKind.COMPLETE:
            results.append(MeasureResult(*res.value))
        else:
            assert res.status == StatusKind.TIMEOUT
            if verbose >= 1:
                print("*T", end="")  # Run timeout
            build_res = build_results[i]
            results.append(
                MeasureResult(
                    (MAX_FLOAT, ),
                    MeasureErrorNo.RUN_TIMEOUT,
                    None,
                    build_res.time_cost + timeout,
                    time.time(),
                ))

    if verbose >= 1:
        print("")

    return results
Example #13
0
class RPCRunner(PyRunner):
    """RPC based runner

    Parameters
    ----------
    rpc_config: RPCConfig
        The rpc configuration.
    evaluator_config: EvaluatorConfig
        The evaluator configuration.
    cooldown_sec: float
        The cooldown in seconds. TODO(@junrushao1994,@zxybazh): This is not used yet.
    alloc_repeat: int
        The number of times to repeat the allocation.
    f_create_session: Optional[str, Callable]
        The function name to create the session or the function itself.
    f_upload_module: Optional[str, Callable]
        The function name to upload the module or the function itself.
    f_alloc_argument: Optional[str, Callable]
        The function name to allocate the arguments or the function itself.
    f_run_evaluator: Optional[str, Callable]
        The function name to run the evaluator or the function itself.
    f_cleanup: Optional[str, Callable]
        The function name to cleanup the session or the function itself.
    pool: PopenPoolExecutor
        The popen pool executor.

    Attributes
    ----------
    T_CREATE_SESSION : typing._GenericAlias
        The signature of the function `f_create_session`, which is:

        .. code-block:: python

        def default_create_session(rpc_config: RPCConfig) -> RPCSession:
            ...

    T_UPLOAD_MODULE : typing._GenericAlias
        The signature of the function `f_upload_module`, which is:

        .. code-block:: python

        def default_upload_module(
            session: RPCSession,
            local_path: str,
            remote_path: str,
        ) -> Module:
            ...

    T_ALLOC_ARGUMENT : typing._GenericAlias
        The signature of the function `f_alloc_argument`, which is:

        .. code-block:: python

        def default_alloc_argument(
            session: RPCSession,
            device: Device,
            args_info: T_ARG_INFO_JSON_OBJ_LIST,
            alloc_repeat: int,
        ) -> List[T_ARGUMENT_LIST]:
            ...

    T_RUN_EVALUATOR : typing._GenericAlias
        The signature of the function `f_run_evaluator`, which is:

        .. code-block:: python

        def default_run_evaluator(
            session: RPCSession,
            rt_mod: Module,
            device: Device,
            evaluator_config: EvaluatorConfig,
            repeated_args: List[T_ARGUMENT_LIST],
        ) -> List[float]:
            ...

    T_CLEANUP : typing._GenericAlias
        The signature of the function `f_cleanup`, which is:

        .. code-block:: python

        def default_cleanup(
            session: Optional[RPCSession],
            remote_path: Optional[str],
        ) -> None:
            ...
    """

    rpc_config: RPCConfig
    evaluator_config: EvaluatorConfig
    cooldown_sec: float
    alloc_repeat: int

    f_create_session: Union[T_CREATE_SESSION, str, None]
    f_upload_module: Union[T_UPLOAD_MODULE, str, None]
    f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None]
    f_run_evaluator: Union[T_RUN_EVALUATOR, str, None]
    f_cleanup: Union[T_CLEANUP, str, None]

    pool: PopenPoolExecutor

    def __init__(
        self,
        rpc_config: Optional[RPCConfig] = None,
        evaluator_config: Optional[EvaluatorConfig] = None,
        cooldown_sec: float = 0.0,
        alloc_repeat: int = 1,
        f_create_session: Union[T_CREATE_SESSION, str, None] = None,
        f_upload_module: Union[T_UPLOAD_MODULE, str, None] = None,
        f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] = None,
        f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] = None,
        f_cleanup: Union[T_CLEANUP, str, None] = None,
        max_workers: Optional[int] = None,
        initializer: Optional[Callable[[], None]] = None,
    ) -> None:
        """Constructor

        Parameters
        ----------
        rpc_config: RPCConfig
            The rpc configuration.
        evaluator_config: EvaluatorConfig
            The evaluator configuration.
        cooldown_sec: float
            The cooldown in seconds.
        alloc_repeat: int
            The number of times to random fill the allocation.
        f_create_session: Union[T_CREATE_SESSION, str, None]
            The function name to create the session or the function itself.
        f_upload_module: Union[T_UPLOAD_MODULE, str, None]
            The function name to upload the module or the function itself.
        f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None]
            The function name to allocate the arguments or the function itself.
        f_run_evaluator: Union[T_RUN_EVALUATOR, str, None]
            The function name to run the evaluator or the function itself.
        f_cleanup: Union[T_CLEANUP, str, None]
            The function name to cleanup the session or the function itself.
        max_workers: Optional[int] = None
            The maximum number of connections. Defaults to number of logical CPU cores.
        initializer: Optional[Callable[[], None]]
            The initializer function.
        """
        super().__init__()
        self.rpc_config = RPCConfig._normalized(rpc_config)
        self.evaluator_config = EvaluatorConfig._normalized(evaluator_config)
        self.cooldown_sec = cooldown_sec
        self.alloc_repeat = alloc_repeat
        self.f_create_session = f_create_session
        self.f_upload_module = f_upload_module
        self.f_alloc_argument = f_alloc_argument
        self.f_run_evaluator = f_run_evaluator
        self.f_cleanup = f_cleanup
        if max_workers is None:
            max_workers = cpu_count(logical=True)
        logger.info("RPCRunner: max_workers = %d", max_workers)
        self.pool = PopenPoolExecutor(
            max_workers=max_workers,
            timeout=rpc_config.session_timeout_sec,
            initializer=initializer,
        )
        self._sanity_check()

    def run(self, runner_inputs: List[RunnerInput]) -> List[RunnerFuture]:
        results: List[RunnerFuture] = []
        for runner_input in runner_inputs:
            future = RPCRunnerFuture(
                future=self.pool.submit(
                    _worker_func,
                    self.f_create_session,
                    self.f_upload_module,
                    self.f_alloc_argument,
                    self.f_run_evaluator,
                    self.f_cleanup,
                    self.rpc_config,
                    self.evaluator_config,
                    self.alloc_repeat,
                    str(runner_input.artifact_path),
                    str(runner_input.device_type),
                    tuple(arg_info.as_json()
                          for arg_info in runner_input.args_info),
                ),
                timeout_sec=self.rpc_config.session_timeout_sec,
            )
            results.append(future)  # type: ignore
        return results

    def _sanity_check(self) -> None:
        def _check(
            f_create_session,
            f_upload_module,
            f_alloc_argument,
            f_run_evaluator,
            f_cleanup,
        ) -> None:
            get_global_func_with_default_on_worker(name=f_create_session,
                                                   default=None)
            get_global_func_with_default_on_worker(name=f_upload_module,
                                                   default=None)
            get_global_func_with_default_on_worker(name=f_alloc_argument,
                                                   default=None)
            get_global_func_with_default_on_worker(name=f_run_evaluator,
                                                   default=None)
            get_global_func_with_default_on_worker(name=f_cleanup,
                                                   default=None)

        value = self.pool.submit(
            _check,
            self.f_create_session,
            self.f_upload_module,
            self.f_alloc_argument,
            self.f_run_evaluator,
            self.f_cleanup,
        )
        value.result()
Example #14
0
class LocalBuilder(Builder):
    """Run compilation on local machine

    Parameters
    ----------
    timeout: float
        The timeout of a compilation
    n_parallel: int
        The number of tasks run in parallel. "None" will use all cpu cores
    build_kwargs: dict
        If supplied, additional kwargs passed to build_func. Overrides any build_kwargs supplied
        by the Runner.
    build_func: callable or str
        If is 'default', use default build function
        If is 'ndk', use function for android ndk
        If id 'stackvm', use function for stackvm
        If is callable, use it as custom build function, expect lib_format field.
    do_fork: bool
        If False, do not fork when building. Requires n_parallel=1.
    runtime: Optional[Runtime]
        Specify the runtime to generate artifacts for
    """
    def __init__(
        self,
        timeout=10,
        n_parallel=None,
        build_kwargs=None,
        build_func="default",
        do_fork=False,
        runtime=None,
    ):
        super(LocalBuilder, self).__init__(timeout, n_parallel, build_kwargs)

        if isinstance(build_func, str):
            if build_func == "default":
                build_func = tar.tar
            elif build_func == "ndk":
                build_func = ndk.create_shared
            elif build_func == "stackvm":
                build_func = stackvm.build
            else:
                raise ValueError("Invalid build_func" + build_func)
        self.build_func = _WrappedBuildFunc(build_func, runtime)
        if not do_fork:
            assert n_parallel in (
                None,
                1,
            ), f"if do_fork=False, need n_parallel=None or 1; got {n_parallel}"
        self.executor = PopenPoolExecutor(
            timeout=timeout,
            initializer=reset_global_scope,
            initargs=(AutotvmGlobalScope.current, ))
        self.tmp_dir = tempfile.mkdtemp()

    def build(self, measure_inputs):
        results = []

        shutil.rmtree(self.tmp_dir, ignore_errors=True)
        self.tmp_dir = tempfile.mkdtemp()

        for i in range(0, len(measure_inputs), self.n_parallel):
            futures = []
            for inp in measure_inputs[i:i + self.n_parallel]:
                ret = self.executor.submit(self.build_func, inp, self.tmp_dir,
                                           **self.build_kwargs)
                futures.append(ret)

            for future in futures:
                try:
                    res = future.result()
                    if res.error is not None:
                        # instantiation error
                        if isinstance(res.error, InstantiationError):
                            res = MeasureResult(
                                (res.error, ),
                                MeasureErrorNo.INSTANTIATION_ERROR,
                                res.time_cost,
                                time.time(),
                            )

                        else:
                            if "InstantiationError" in str(res.error):
                                msg = str(res.error)
                                try:
                                    msg = msg.split("\n")[-2].split(": ")[1]
                                except Exception:  # pylint: disable=broad-except
                                    pass
                                res = MeasureResult(
                                    (InstantiationError(msg), ),
                                    MeasureErrorNo.INSTANTIATION_ERROR,
                                    res.time_cost,
                                    time.time(),
                                )

                            else:  # tvm error
                                res = MeasureResult(
                                    (res.error, ),
                                    MeasureErrorNo.COMPILE_HOST,
                                    res.time_cost,
                                    time.time(),
                                )
                except TimeoutError as ex:
                    res = MeasureResult((ex, ), MeasureErrorNo.BUILD_TIMEOUT,
                                        self.timeout, time.time())
                except ChildProcessError as ex:
                    res = MeasureResult(
                        (ex, ),
                        MeasureErrorNo.RUNTIME_DEVICE,
                        self.timeout,
                        time.time(),
                    )

                results.append(res)

        return results
Example #15
0
class RPCRunner(Runner):
    """Run generated code on remove devices.
    This function will ask a RPC Tracker to get device for measurement.

    Parameters
    ----------
    timeout: float
        The timeout of a RPCRunner measurement task
    n_parallel: int
        The number of tasks run in parallel. "None" will use all cpu cores
    key: str
        The key of the device registered in the tracker
    host: str
        The host address of RPC Tracker
    port: int
        The port of RPC Tracker
    number: int
        The number of times to run the generated code for taking average.
        We call these runs as one `repeat` of measurement.
    repeat : int, optional
        The number of times to repeat the measurement.
        In total, the generated code will be run (1 + number x repeat) times,
        where the first "1" is warm up and will be discarded.
        The returned result contains `repeat` costs,
        each of which is an average of `number` costs.
    min_repeat_ms: int, optional
        The minimum duration of one `repeat` in milliseconds.
        By default, one `repeat` contains `number` runs. If this parameter is set,
        the parameters `number` will be dynamically adjusted to meet the
        minimum duration requirement of one `repeat`.
        i.e., When the run time of one `repeat` falls below this time, the `number` parameter
        will be automatically increased.
    cooldown_interval: float, optional
        The cool down interval between two measurements.
    enable_cpu_cache_flush: bool
        Whether to flush cache on CPU between repeated measurements.
        Flushing cache can make the measured latency of one operator closer to
        its actual latency during end-to-end inference.
        To make this option effective, the argument `number` should also be set to 1.
        This is only has effect on CPU task.
    module_loader : ModuleLoader
        If given, a context manager that loads the module to be timed into the remote runtime.
        If not given, default_module_loader is used.
    """
    def __init__(
        self,
        key,
        host,
        port,
        priority=1,
        timeout=10,
        n_parallel=None,
        number=4,
        repeat=3,
        min_repeat_ms=0,
        cooldown_interval=0.1,
        enable_cpu_cache_flush=False,
        module_loader=None,
    ):
        super(RPCRunner, self).__init__(timeout, n_parallel)

        self.key = key
        self.host = host
        self.port = port
        self.priority = priority
        self.timeout = timeout

        self.number = number
        self.repeat = repeat
        self.min_repeat_ms = min_repeat_ms
        self._ref_input = None

        self.enable_cpu_cache_flush = enable_cpu_cache_flush
        self.cooldown_interval = cooldown_interval
        self.module_loader = module_loader

        self.executor = PopenPoolExecutor(
            timeout=timeout * (self.n_parallel + 1),
            initializer=reset_global_scope,
            initargs=(AutotvmGlobalScope.current, ),
        )

    @property
    def ref_input(self):
        """
        Fixed input for tuning special operators, e.g., sparse operators
        requiring indices as input.
        """
        return self._ref_input

    @ref_input.setter
    def ref_input(self, val):
        if val is not None:
            warnings.warn(
                "You are specifying fixed input for tuning the operator. "
                "Be sure your input always fits the operator. Some "
                "operators may conduct layout transformation during tuning, "
                "thus can lead to unexpected behaviors. ",
                RuntimeWarning,
            )
        self._ref_input = val

    def set_task(self, task):
        self.task = task

        if check_remote(task.target, self.key, self.host, self.port):
            logger.info("Get devices for measurement successfully!")
        else:
            raise RuntimeError(
                "Cannot get remote devices from the tracker. "
                "Please check the status of tracker by "
                "'python -m tvm.exec.query_rpc_tracker --port [THE PORT YOU USE]' "
                "and make sure you have free devices on the queue status.")

    def get_build_kwargs(self):
        kwargs = {}
        if ("cuda" in self.task.target.keys
                or "opencl" in self.task.target.keys
                or "rocm" in self.task.target.keys
                or "vulkan" in self.task.target.keys):
            remote = request_remote(self.key, self.host, self.port)
            dev = remote.device(str(self.task.target), 0)
            max_dims = dev.max_thread_dimensions
            kwargs["check_gpu"] = {
                "max_shared_memory_per_block": dev.max_shared_memory_per_block,
                "max_threads_per_block": dev.max_threads_per_block,
                "max_thread_x": max_dims[0],
                "max_thread_y": max_dims[1],
                "max_thread_z": max_dims[2],
            }

        return kwargs

    def run(self, measure_inputs, build_results):
        results = []
        remote_kwargs = dict(
            device_key=self.key,
            host=self.host,
            port=self.port,
            priority=self.priority,
            timeout=self.timeout,
        )

        for i in range(0, len(measure_inputs), self.n_parallel):
            futures = []
            for measure_inp, build_res in zip(
                    measure_inputs[i:i + self.n_parallel],
                    build_results[i:i + self.n_parallel]):
                module_loader = (self.module_loader if self.module_loader
                                 is not None else default_module_loader())
                ret = self.executor.submit(
                    run_through_rpc,
                    measure_inp,
                    build_res,
                    self.number,
                    self.repeat,
                    self.min_repeat_ms,
                    self.cooldown_interval,
                    remote_kwargs,
                    self.ref_input,
                    self.enable_cpu_cache_flush,
                    module_loader,
                )
                futures.append(ret)

            for future in futures:
                try:
                    res = future.result()
                    results.append(res)
                except Exception as ex:  # pylint: disable=broad-except
                    results.append(
                        MeasureResult((str(ex), ), MeasureErrorNo.RUN_TIMEOUT,
                                      self.timeout, time.time()))

        return results
Example #16
0
class RPCRunner(PyRunner):
    """RPC based runner

    Parameters
    ----------
    rpc_config: RPCConfig
        The rpc configuration.
    evaluator_config: EvaluatorConfig
        The evaluator configuration.
    cooldown_sec: float
        The cooldown in seconds. TODO(@junrushao1994,@zxybazh): This is not used yet.
    alloc_repeat: int
        The number of times to repeat the allocation.
    f_create_session: Optional[str, Callable]
        The function name to create the session or the function itself.
    f_upload_module: Optional[str, Callable]
        The function name to upload the module or the function itself.
    f_alloc_argument: Optional[str, Callable]
        The function name to allocate the arguments or the function itself.
    f_run_evaluator: Optional[str, Callable]
        The function name to run the evaluator or the function itself.
    f_cleanup: Optional[str, Callable]
        The function name to cleanup the session or the function itself.
    pool: PopenPoolExecutor
        The popen pool executor.

    Attributes
    ----------
    T_CREATE_SESSION : typing._GenericAlias
        The signature of the function `f_create_session`, which is:

        .. code-block:: python

        def default_create_session(rpc_config: RPCConfig) -> RPCSession:
            ...

    T_UPLOAD_MODULE : typing._GenericAlias
        The signature of the function `f_upload_module`, which is:

        .. code-block:: python

        def default_upload_module(
            session: RPCSession,
            local_path: str,
            remote_path: str,
        ) -> Module:
            ...

    T_ALLOC_ARGUMENT : typing._GenericAlias
        The signature of the function `f_alloc_argument`, which is:

        .. code-block:: python

        def default_alloc_argument(
            session: RPCSession,
            device: Device,
            args_info: T_ARG_INFO_JSON_OBJ_LIST,
            alloc_repeat: int,
        ) -> List[T_ARGUMENT_LIST]:
            ...

    T_RUN_EVALUATOR : typing._GenericAlias
        The signature of the function `f_run_evaluator`, which is:

        .. code-block:: python

        def default_run_evaluator(
            session: RPCSession,
            rt_mod: Module,
            device: Device,
            evaluator_config: EvaluatorConfig,
            repeated_args: List[T_ARGUMENT_LIST],
        ) -> List[float]:
            ...

    T_CLEANUP : typing._GenericAlias
        The signature of the function `f_cleanup`, which is:

        .. code-block:: python

        def default_cleanup(
            session: Optional[RPCSession],
            remote_path: Optional[str],
        ) -> None:
            ...
    """

    T_CREATE_SESSION = Callable[
        [RPCConfig],  # The RPC configuration
        RPCSession,  # The RPC Session
    ]
    T_UPLOAD_MODULE = Callable[
        [
            RPCSession,  # The RPC Session
            str,  # local path to the artifact
            str,  # remote path to the artifact
        ],
        Module,  # the Module opened on the remote
    ]
    T_ALLOC_ARGUMENT = Callable[
        [
            RPCSession,  # The RPC Session
            Device,  # The device on the remote
            T_ARG_INFO_JSON_OBJ_LIST,  # The metadata information of the arguments to be allocated
            int,  # The number of repeated allocations to be done
        ],
        List[T_ARGUMENT_LIST],  # A list of argument lists
    ]
    T_RUN_EVALUATOR = Callable[
        [
            RPCSession,  # The RPC Session
            Module,  # The Module opened on the remote
            Device,  # The device on the remote
            EvaluatorConfig,  # The evaluator configuration
            List[T_ARGUMENT_LIST],  # A list of argument lists
        ],
        List[float],  # A list of running time
    ]
    T_CLEANUP = Callable[
        [
            Optional[RPCSession],  # The RPC Session to be cleaned up
            Optional[str],  # remote path to the artifact
        ],
        None,
    ]

    rpc_config: RPCConfig
    evaluator_config: EvaluatorConfig
    cooldown_sec: float
    alloc_repeat: int

    f_create_session: Union[T_CREATE_SESSION, str, None]
    f_upload_module: Union[T_UPLOAD_MODULE, str, None]
    f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None]
    f_run_evaluator: Union[T_RUN_EVALUATOR, str, None]
    f_cleanup: Union[T_CLEANUP, str, None]

    pool: PopenPoolExecutor

    def __init__(
        self,
        rpc_config: Optional[RPCConfig] = None,
        evaluator_config: Optional[EvaluatorConfig] = None,
        cooldown_sec: float = 0.0,
        alloc_repeat: int = 1,
        f_create_session: Union[T_CREATE_SESSION, str, None] = None,
        f_upload_module: Union[T_UPLOAD_MODULE, str, None] = None,
        f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] = None,
        f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] = None,
        f_cleanup: Union[T_CLEANUP, str, None] = None,
        max_workers: int = 1,
        initializer: Optional[Callable[[], None]] = None,
    ) -> None:
        """Constructor

        Parameters
        ----------
        rpc_config: RPCConfig
            The rpc configuration.
        evaluator_config: EvaluatorConfig
            The evaluator configuration.
        cooldown_sec: float
            The cooldown in seconds.
        alloc_repeat: int
            The number of times to random fill the allocation.
        f_create_session: Union[T_CREATE_SESSION, str, None]
            The function name to create the session or the function itself.
        f_upload_module: Union[T_UPLOAD_MODULE, str, None]
            The function name to upload the module or the function itself.
        f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None]
            The function name to allocate the arguments or the function itself.
        f_run_evaluator: Union[T_RUN_EVALUATOR, str, None]
            The function name to run the evaluator or the function itself.
        f_cleanup: Union[T_CLEANUP, str, None]
            The function name to cleanup the session or the function itself.
        max_workers: int = 1
            The maximum number of connections. Defaults to 1.
        initializer: Optional[Callable[[], None]]
            The initializer function.
        """
        super().__init__()
        self.rpc_config = RPCConfig._normalized(rpc_config)
        self.evaluator_config = EvaluatorConfig._normalized(evaluator_config)
        self.cooldown_sec = cooldown_sec
        self.alloc_repeat = alloc_repeat
        self.f_create_session = f_create_session
        self.f_upload_module = f_upload_module
        self.f_alloc_argument = f_alloc_argument
        self.f_run_evaluator = f_run_evaluator
        self.f_cleanup = f_cleanup
        logger.info("RPCRunner: max_workers = %d", max_workers)
        self.pool = PopenPoolExecutor(
            max_workers=max_workers,
            timeout=rpc_config.session_timeout_sec,
            initializer=initializer,
        )
        self._sanity_check()

    def run(self, runner_inputs: List[RunnerInput]) -> List[RunnerFuture]:
        results: List[RunnerFuture] = []
        for runner_input in runner_inputs:
            future = RPCRunnerFuture(
                future=self.pool.submit(
                    RPCRunner._worker_func,
                    self.f_create_session,
                    self.f_upload_module,
                    self.f_alloc_argument,
                    self.f_run_evaluator,
                    self.f_cleanup,
                    self.rpc_config,
                    self.evaluator_config,
                    self.alloc_repeat,
                    str(runner_input.artifact_path),
                    str(runner_input.device_type),
                    tuple(arg_info.as_json() for arg_info in runner_input.args_info),
                ),
                timeout_sec=self.rpc_config.session_timeout_sec,
            )
            results.append(future)
        return results

    def _sanity_check(self) -> None:
        def _check(
            f_create_session,
            f_upload_module,
            f_alloc_argument,
            f_run_evaluator,
            f_cleanup,
        ) -> None:
            get_global_func_with_default_on_worker(name=f_create_session, default=None)
            get_global_func_with_default_on_worker(name=f_upload_module, default=None)
            get_global_func_with_default_on_worker(name=f_alloc_argument, default=None)
            get_global_func_with_default_on_worker(name=f_run_evaluator, default=None)
            get_global_func_with_default_on_worker(name=f_cleanup, default=None)

        value = self.pool.submit(
            _check,
            self.f_create_session,
            self.f_upload_module,
            self.f_alloc_argument,
            self.f_run_evaluator,
            self.f_cleanup,
        )
        value.result()

    @staticmethod
    def _worker_func(
        _f_create_session: Union[T_CREATE_SESSION, str, None],
        _f_upload_module: Union[T_UPLOAD_MODULE, str, None],
        _f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None],
        _f_run_evaluator: Union[T_RUN_EVALUATOR, str, None],
        _f_cleanup: Union[T_CLEANUP, str, None],
        rpc_config: RPCConfig,
        evaluator_config: EvaluatorConfig,
        alloc_repeat: int,
        artifact_path: str,
        device_type: str,
        args_info: T_ARG_INFO_JSON_OBJ_LIST,
    ) -> List[float]:
        # Step 0. Get the registered functions
        f_create_session: RPCRunner.T_CREATE_SESSION = get_global_func_with_default_on_worker(
            _f_create_session, default_create_session
        )
        f_upload_module: RPCRunner.T_UPLOAD_MODULE = get_global_func_with_default_on_worker(
            _f_upload_module, default_upload_module
        )
        f_alloc_argument: RPCRunner.T_ALLOC_ARGUMENT = get_global_func_with_default_on_worker(
            _f_alloc_argument, default_alloc_argument
        )
        f_run_evaluator: RPCRunner.T_RUN_EVALUATOR = get_global_func_with_default_on_worker(
            _f_run_evaluator, default_run_evaluator
        )
        f_cleanup: RPCRunner.T_CLEANUP = get_global_func_with_default_on_worker(
            _f_cleanup, default_cleanup
        )
        # Managed resources
        session: Optional[RPCSession] = None
        remote_path: Optional[str] = None

        @contextmanager
        def resource_handler():
            try:
                yield
            finally:
                # Final step. Always clean up
                f_cleanup(session, remote_path)

        with resource_handler():
            # Step 1. Create session
            session = f_create_session(rpc_config)
            device = session.device(dev_type=device_type, dev_id=0)
            # Step 2. Upload the module
            _, remote_path = osp.split(artifact_path)
            local_path: str = artifact_path
            rt_mod: Module = f_upload_module(session, local_path, remote_path)
            # Step 3: Allocate input arguments
            repeated_args: List[T_ARGUMENT_LIST] = f_alloc_argument(
                session,
                device,
                args_info,
                alloc_repeat,
            )
            # Step 4: Run time_evaluator
            costs: List[float] = f_run_evaluator(
                session,
                rt_mod,
                device,
                evaluator_config,
                repeated_args,
            )
        return costs