class LocalRPCMeasureContext: """ A context wrapper for running RPCRunner locally. This will launch a local RPC Tracker and local RPC Server. TODO(FrozenGene): Add cpu cache flush to this RPC context. Parameters ---------- priority : int = 1 The priority of this run request, larger is more prior. n_parallel : int = 1 The number of tasks run in parallel. timeout : int = 10 The timeout limit (in second) for each run. This is used in a wrapper of the multiprocessing.Process.join(). number : int = 3 The number of times to run the generated code for taking average. We call these runs as one `repeat` of measurement. repeat : int = 1 The number of times to repeat the measurement. In total, the generated code will be run (1 + number x repeat) times, where the first "1" is warm up and will be discarded. The returned result contains `repeat` costs, each of which is an average of `number` costs. min_repeat_ms : int = 0 The minimum duration of one `repeat` in milliseconds. By default, one `repeat` contains `number` runs. If this parameter is set, the parameters `number` will be dynamically adjusted to meet the minimum duration requirement of one `repeat`. i.e., When the run time of one `repeat` falls below this time, the `number` parameter will be automatically increased. cooldown_interval : float = 0.0 The cool down interval between two measurements. """ def __init__(self, priority=1, n_parallel=1, timeout=10, number=3, repeat=1, min_repeat_ms=0, cooldown_interval=0.0): ctx = tvm.context("cuda", 0) if ctx.exist: cuda_arch = "sm_" + "".join(ctx.compute_version.split('.')) set_cuda_target_arch(cuda_arch) host = '0.0.0.0' self.tracker = Tracker(host, port=9000, port_end=10000, silent=True) device_key = '$local$device$%d' % self.tracker.port self.server = Server(host, port=self.tracker.port, port_end=10000, key=device_key, use_popen=True, silent=True, tracker_addr=(self.tracker.host, self.tracker.port)) self.runner = RPCRunner(device_key, host, self.tracker.port, priority, n_parallel, timeout, number, repeat, min_repeat_ms, cooldown_interval) # Wait for the processes to start time.sleep(0.5) def __del__(self): # Close the tracker and server before exit self.tracker.terminate() self.server.terminate()
class LocalRPCMeasureContext: """A context wrapper for running RPCRunner locally. This will launch a local RPC Tracker and local RPC Server. Parameters ---------- priority : int = 1 The priority of this run request, larger is more prior. n_parallel : int = 1 The number of tasks run in parallel. timeout : int = 10 The timeout limit (in second) for each run. This is used in a wrapper of the multiprocessing.Process.join(). number : int = 3 The number of times to run the generated code for taking average. We call these runs as one `repeat` of measurement. repeat : int = 1 The number of times to repeat the measurement. In total, the generated code will be run (1 + number x repeat) times, where the first "1" is warm up and will be discarded. The returned result contains `repeat` costs, each of which is an average of `number` costs. min_repeat_ms : int = 0 The minimum duration of one `repeat` in milliseconds. By default, one `repeat` contains `number` runs. If this parameter is set, the parameters `number` will be dynamically adjusted to meet the minimum duration requirement of one `repeat`. i.e., When the run time of one `repeat` falls below this time, the `number` parameter will be automatically increased. cooldown_interval : float = 0.0 The cool down interval between two measurements. enable_cpu_cache_flush: bool = False Whether to flush cache on CPU between repeated measurements. Flushing cache can make the measured latency of one operator closer to its actual latency during end-to-end inference. To make this option effective, the argument `number` should also be set to 1. This is only has effect on CPU task. """ def __init__( self, priority=1, n_parallel=1, timeout=10, number=3, repeat=1, min_repeat_ms=0, cooldown_interval=0.0, enable_cpu_cache_flush=False, ): # pylint: disable=import-outside-toplevel from tvm.rpc.tracker import Tracker from tvm.rpc.server import Server dev = tvm.device("cuda", 0) if dev.exist: cuda_arch = "sm_" + "".join(dev.compute_version.split(".")) set_cuda_target_arch(cuda_arch) host = "0.0.0.0" self.tracker = Tracker(host, port=9000, port_end=10000, silent=True) device_key = "$local$device$%d" % self.tracker.port self.server = Server( host, port=self.tracker.port, port_end=10000, key=device_key, use_popen=True, silent=True, tracker_addr=(self.tracker.host, self.tracker.port), ) self.runner = RPCRunner( device_key, host, self.tracker.port, priority, n_parallel, timeout, number, repeat, min_repeat_ms, cooldown_interval, enable_cpu_cache_flush, ) # Wait for the processes to start time.sleep(0.5) def __del__(self): # Close the tracker and server before exit self.tracker.terminate() self.server.terminate() time.sleep(0.5)