Example #1
0
    def adaptive_autorange(
            self,
            threshold: float = 0.1,
            *,
            min_run_time: float = 0.01,
            max_run_time: float = 10.0,
            callback: Optional[Callable[[int, float], NoReturn]] = None,
    ) -> common.Measurement:
        number = self._estimate_block_size(min_run_time=0.05)

        def time_hook() -> float:
            return self._timer.timeit(number)

        def stop_hook(times: List[float]) -> bool:
            if len(times) > 3:
                return common.Measurement(
                    number_per_run=number,
                    raw_times=times,
                    task_spec=self._task_spec
                ).meets_confidence(threshold=threshold)
            return False
        times = self._threaded_measurement_loop(
            number, time_hook, stop_hook, min_run_time, max_run_time, callback=callback)

        return common.Measurement(
            number_per_run=number,
            raw_times=times,
            task_spec=self._task_spec
        )
Example #2
0
    def blocked_autorange(
        self,
        callback: Optional[Callable[[int, float], NoReturn]] = None,
        min_run_time: float = 0.2,
    ) -> common.Measurement:
        """Measure many replicates while keeping timer overhead to a minimum.

        At a high level, blocked_autorange executes the following pseudo-code::

            `setup`

            total_time = 0
            while total_time < min_run_time
                start = timer()
                for _ in range(block_size):
                    `stmt`
                total_time += (timer() - start)

        Note the variable `block_size` in the inner loop. The choice of block
        size is important to measurement quality, and must balance two
        competing objectives:

            1) A small block size results in more replicates and generally
               better statistics.

            2) A large block size better amortizes the cost of `timer`
               invocation, and results in a less biased measurement. This is
               important because CUDA syncronization time is non-trivial
               (order single to low double digit microseconds) and would
               otherwise bias the measurement.

        blocked_autorange sets block_size by running a warmup period,
        increasing block size until timer overhead is less than 0.1% of
        the overall computation. This value is then used for the main
        measurement loop.

        Returns:
            A `Measurement` object that contains measured runtimes and
            repetition counts, and can be used to compute statistics.
            (mean, median, etc.)
        """
        number = self._estimate_block_size(min_run_time)

        def time_hook() -> float:
            return self._timer.timeit(number)

        def stop_hook(times: List[float]) -> bool:
            return True

        times = self._threaded_measurement_loop(
            number, time_hook, stop_hook,
            min_run_time=min_run_time,
            callback=callback)

        return common.Measurement(
            number_per_run=number,
            raw_times=times,
            task_spec=self._task_spec
        )
Example #3
0
 def stop_hook(times: List[float]) -> bool:
     if len(times) > 3:
         return common.Measurement(
             number_per_run=number,
             raw_times=times,
             task_spec=self._task_spec
         ).meets_confidence(threshold=threshold)
     return False
Example #4
0
    def timeit(self, number=1000000):
        with common.set_torch_threads(self._task_spec.num_threads):
            # Warmup
            self._timer.timeit(number=max(int(number // 100), 1))

            return common.Measurement(
                number_per_run=number,
                raw_times=[self._timer.timeit(number=number)],
                task_spec=self._task_spec)
Example #5
0
    def timeit(self, number: int = 1000000) -> common.Measurement:
        """Mirrors the semantics of timeit.Timer.timeit().

        Execute the main statement (`stmt`) `number` times.
        https://docs.python.org/3/library/timeit.html#timeit.Timer.timeit
        """
        with common.set_torch_threads(self._task_spec.num_threads):
            # Warmup
            self._timeit(number=max(int(number // 100), 2))

            return common.Measurement(number_per_run=number,
                                      raw_times=[self._timeit(number=number)],
                                      task_spec=self._task_spec)
Example #6
0
    def blocked_autorange(self, callback=None, min_run_time=0.2):
        number = self._estimate_block_size(min_run_time)

        def time_hook() -> float:
            return self._timer.timeit(number)

        def stop_hook(times) -> bool:
            return True

        times = self._threaded_measurement_loop(number,
                                                time_hook,
                                                stop_hook,
                                                min_run_time=min_run_time,
                                                callback=callback)

        return common.Measurement(number_per_run=number,
                                  raw_times=times,
                                  task_spec=self._task_spec)