Ejemplo n.º 1
0
 def __init__(self,
              priority=1,
              n_parallel=1,
              timeout=10,
              number=3,
              repeat=1,
              min_repeat_ms=0,
              cooldown_interval=0.0,
              enable_cpu_cache_flush=False):
     ctx = tvm.context("cuda", 0)
     if ctx.exist:
         cuda_arch = "sm_" + "".join(ctx.compute_version.split('.'))
         set_cuda_target_arch(cuda_arch)
     host = '0.0.0.0'
     self.tracker = Tracker(host, port=9000, port_end=10000, silent=True)
     device_key = '$local$device$%d' % self.tracker.port
     self.server = Server(host,
                          port=self.tracker.port,
                          port_end=10000,
                          key=device_key,
                          use_popen=True,
                          silent=True,
                          tracker_addr=(self.tracker.host,
                                        self.tracker.port))
     self.runner = RPCRunner(device_key, host, self.tracker.port, priority,
                             n_parallel, timeout, number, repeat,
                             min_repeat_ms, cooldown_interval,
                             enable_cpu_cache_flush)
     # Wait for the processes to start
     time.sleep(0.5)
Ejemplo n.º 2
0
class LocalRPCMeasureContext:
    """ A context wrapper for running RPCRunner locally.
    This will launch a local RPC Tracker and local RPC Server.

    TODO(FrozenGene): Add cpu cache flush to this RPC context.

    Parameters
    ----------
    priority : int = 1
        The priority of this run request, larger is more prior.
    n_parallel : int = 1
        The number of tasks run in parallel.
    timeout : int = 10
        The timeout limit (in second) for each run.
        This is used in a wrapper of the multiprocessing.Process.join().
    number : int = 3
        The number of times to run the generated code for taking average.
        We call these runs as one `repeat` of measurement.
    repeat : int = 1
        The number of times to repeat the measurement.
        In total, the generated code will be run (1 + number x repeat) times,
        where the first "1" is warm up and will be discarded.
        The returned result contains `repeat` costs,
        each of which is an average of `number` costs.
    min_repeat_ms : int = 0
        The minimum duration of one `repeat` in milliseconds.
        By default, one `repeat` contains `number` runs. If this parameter is set,
        the parameters `number` will be dynamically adjusted to meet the
        minimum duration requirement of one `repeat`.
        i.e., When the run time of one `repeat` falls below this time, the `number` parameter
        will be automatically increased.
    cooldown_interval : float = 0.0
        The cool down interval between two measurements.
    """

    def __init__(self, priority=1, n_parallel=1, timeout=10, number=3, repeat=1,
                 min_repeat_ms=0, cooldown_interval=0.0):
        ctx = tvm.context("cuda", 0)
        if ctx.exist:
            cuda_arch = "sm_" + "".join(ctx.compute_version.split('.'))
            set_cuda_target_arch(cuda_arch)
        host = '0.0.0.0'
        self.tracker = Tracker(host, port=9000, port_end=10000, silent=True)
        device_key = '$local$device$%d' % self.tracker.port
        self.server = Server(host, port=self.tracker.port, port_end=10000,
                             key=device_key, use_popen=True, silent=True,
                             tracker_addr=(self.tracker.host, self.tracker.port))
        self.runner = RPCRunner(device_key, host, self.tracker.port, priority,
                                n_parallel, timeout, number, repeat,
                                min_repeat_ms, cooldown_interval)
        # Wait for the processes to start
        time.sleep(0.5)

    def __del__(self):
        # Close the tracker and server before exit
        self.tracker.terminate()
        self.server.terminate()
Ejemplo n.º 3
0
    def __init__(
        self,
        priority=1,
        n_parallel=1,
        timeout=10,
        number=3,
        repeat=1,
        min_repeat_ms=0,
        cooldown_interval=0.0,
        enable_cpu_cache_flush=False,
    ):
        # pylint: disable=import-outside-toplevel
        from tvm.rpc.tracker import Tracker
        from tvm.rpc.server import Server

        dev = tvm.device("cuda", 0)
        if dev.exist:
            cuda_arch = "sm_" + "".join(dev.compute_version.split("."))
            set_cuda_target_arch(cuda_arch)
        host = "0.0.0.0"
        self.tracker = Tracker(host, port=9000, port_end=10000, silent=True)
        device_key = "$local$device$%d" % self.tracker.port
        self.server = Server(
            host,
            port=self.tracker.port,
            port_end=10000,
            key=device_key,
            use_popen=True,
            silent=True,
            tracker_addr=(self.tracker.host, self.tracker.port),
        )
        self.runner = RPCRunner(
            device_key,
            host,
            self.tracker.port,
            priority,
            n_parallel,
            timeout,
            number,
            repeat,
            min_repeat_ms,
            cooldown_interval,
            enable_cpu_cache_flush,
        )
        # Wait for the processes to start
        time.sleep(0.5)
Ejemplo n.º 4
0
Archivo: measure.py Proyecto: were/tvm
    def __init__(
        self,
        priority=1,
        n_parallel=1,
        timeout=10,
        number=3,
        repeat=1,
        min_repeat_ms=0,
        cooldown_interval=0.0,
        enable_cpu_cache_flush=False,
        device=0,
    ):
        # pylint: disable=import-outside-toplevel
        from tvm.rpc.tracker import Tracker
        from tvm.rpc.server import Server

        self.tracker = Tracker(port=9000, port_end=10000, silent=True)
        device_key = "$local$device$%d" % self.tracker.port
        self.server = Server(
            port=self.tracker.port,
            port_end=10000,
            key=device_key,
            silent=True,
            tracker_addr=("127.0.0.1", self.tracker.port),
        )
        self.runner = RPCRunner(
            device_key,
            "127.0.0.1",
            self.tracker.port,
            priority,
            n_parallel,
            timeout,
            number,
            repeat,
            min_repeat_ms,
            cooldown_interval,
            enable_cpu_cache_flush,
            device,
        )
        # Wait for the processes to start
        time.sleep(0.5)
Ejemplo n.º 5
0
 def __init__(
     self,
     tracker_key: str = "key",
     silent: bool = False,
     no_fork: bool = False,
 ) -> None:
     self.tracker = Tracker(
         silent=silent,
         port=9190,
         port_end=12345,
     )
     self.server = Server(
         host="0.0.0.0",
         is_proxy=False,
         tracker_addr=(self.tracker.host, self.tracker.port),
         key=tracker_key,
         silent=silent,
         no_fork=no_fork,
         port=9190,
         port_end=12345,
     )
     self.tracker_host = self.tracker.host
     self.tracker_port = self.tracker.port
     self.tracker_key = tracker_key
Ejemplo n.º 6
0
class LocalRPCMeasureContext:
    """A context wrapper for running RPCRunner locally.
    This will launch a local RPC Tracker and local RPC Server.

    Parameters
    ----------
    priority : int = 1
        The priority of this run request, larger is more prior.
    n_parallel : int = 1
        The number of tasks run in parallel.
    timeout : int = 10
        The timeout limit (in second) for each run.
        This is used in a wrapper of the multiprocessing.Process.join().
    number : int = 3
        The number of times to run the generated code for taking average.
        We call these runs as one `repeat` of measurement.
    repeat : int = 1
        The number of times to repeat the measurement.
        In total, the generated code will be run (1 + number x repeat) times,
        where the first "1" is warm up and will be discarded.
        The returned result contains `repeat` costs,
        each of which is an average of `number` costs.
    min_repeat_ms : int = 0
        The minimum duration of one `repeat` in milliseconds.
        By default, one `repeat` contains `number` runs. If this parameter is set,
        the parameters `number` will be dynamically adjusted to meet the
        minimum duration requirement of one `repeat`.
        i.e., When the run time of one `repeat` falls below this time, the `number` parameter
        will be automatically increased.
    cooldown_interval : float = 0.0
        The cool down interval between two measurements.
    enable_cpu_cache_flush: bool = False
        Whether to flush cache on CPU between repeated measurements.
        Flushing cache can make the measured latency of one operator closer to
        its actual latency during end-to-end inference.
        To make this option effective, the argument `number` should also be set to 1.
        This is only has effect on CPU task.
    """
    def __init__(
        self,
        priority=1,
        n_parallel=1,
        timeout=10,
        number=3,
        repeat=1,
        min_repeat_ms=0,
        cooldown_interval=0.0,
        enable_cpu_cache_flush=False,
    ):
        # pylint: disable=import-outside-toplevel
        from tvm.rpc.tracker import Tracker
        from tvm.rpc.server import Server

        dev = tvm.device("cuda", 0)
        if dev.exist:
            cuda_arch = "sm_" + "".join(dev.compute_version.split("."))
            set_cuda_target_arch(cuda_arch)
        host = "0.0.0.0"
        self.tracker = Tracker(host, port=9000, port_end=10000, silent=True)
        device_key = "$local$device$%d" % self.tracker.port
        self.server = Server(
            host,
            port=self.tracker.port,
            port_end=10000,
            key=device_key,
            use_popen=True,
            silent=True,
            tracker_addr=(self.tracker.host, self.tracker.port),
        )
        self.runner = RPCRunner(
            device_key,
            host,
            self.tracker.port,
            priority,
            n_parallel,
            timeout,
            number,
            repeat,
            min_repeat_ms,
            cooldown_interval,
            enable_cpu_cache_flush,
        )
        # Wait for the processes to start
        time.sleep(0.5)

    def __del__(self):
        # Close the tracker and server before exit
        self.tracker.terminate()
        self.server.terminate()
        time.sleep(0.5)