Exemple #1
0
    def test_collect_callgrind(self):
        with self.assertRaisesRegex(
                ValueError,
                r"`collect_callgrind` requires that globals be wrapped "
                r"in `CopyIfCallgrind` so that serialization is explicit."):
            benchmark_utils.Timer("pass", globals={
                "x": 1
            }).collect_callgrind(collect_baseline=False)

        with self.assertRaisesRegex(
                # Subprocess raises AttributeError (from pickle),
                # _ValgrindWrapper re-raises as generic OSError.
                OSError,
                "AttributeError: Can't get attribute 'MyModule'"):
            benchmark_utils.Timer("model(1)",
                                  globals={
                                      "model":
                                      benchmark_utils.CopyIfCallgrind(
                                          MyModule())
                                  }).collect_callgrind(collect_baseline=False)

        @torch.jit.script
        def add_one(x):
            return x + 1

        timer = benchmark_utils.Timer(
            "y = add_one(x) + k",
            setup="x = torch.ones((1,))",
            globals={
                "add_one":
                benchmark_utils.CopyIfCallgrind(add_one),
                "k":
                benchmark_utils.CopyIfCallgrind(5),
                "model":
                benchmark_utils.CopyIfCallgrind(MyModule(),
                                                setup=f"""\
                    import sys
                    sys.path.append({repr(os.path.split(os.path.abspath(__file__))[0])})
                    from test_benchmark_utils import MyModule
                    """)
            })

        stats = timer.collect_callgrind(number=1000)
        counts = stats.counts(denoise=False)

        self.assertIsInstance(counts, int)
        self.assertGreater(counts, 0)

        stats = timer.collect_callgrind(number=1000, repeats=10)
        assert isinstance(stats, tuple)

        # Check that the repeats are at least somewhat repeatable.
        counts = collections.Counter([s.counts(denoise=True) for s in stats])
        self.assertGreater(
            max(counts.values), 1,
            f"Every instruction count total was unique: {counts}")

        from torch.utils.benchmark.utils.valgrind_wrapper.timer_interface import wrapper_singleton
        self.assertIsNone(wrapper_singleton()._bindings_module,
                          "JIT'd bindings are only for back testing.")
Exemple #2
0
    def collect_callgrind(self, number=100, collect_baseline=True):
        if not isinstance(self._task_spec.stmt, str):
            raise ValueError(
                "`collect_callgrind` currently only supports string `stmt`")

        # __init__ adds torch, and Timer adds __builtins__
        allowed_keys = {"torch", "__builtins__"}
        if any(k not in allowed_keys for k in self._globals.keys()):
            raise ValueError(
                "`collect_callgrind` does not currently support passing globals. "
                "Please define a `setup` str instead.")

        if self._globals.get("torch", torch) is not torch:
            raise ValueError(
                "`collect_callgrind` does not support mocking out `torch`.")

        # Check that the statement is valid. It doesn't guarantee success, but it's much
        # simpler and quicker to raise an exception for a faulty `stmt` or `setup` in
        # the parent process rather than the valgrind subprocess.
        self._timer.timeit(1)
        return valgrind_timer_interface.wrapper_singleton().collect_callgrind(
            stmt=self._task_spec.stmt,
            setup=self._task_spec.setup,
            number=number,
            num_threads=self._task_spec.num_threads,
            collect_baseline=collect_baseline)
Exemple #3
0
    def collect_callgrind(
        self,
        number: int = 100,
        *,
        collect_baseline: bool = True,
        retain_out_file: bool = False,
    ) -> valgrind_timer_interface.CallgrindStats:
        """Collect instruction counts using Callgrind.

        Unlike wall times, instruction counts are deterministic
        (modulo non-determinism in the program itself and small amounts of
        jitter from the Python interpreter.) This makes them ideal for detailed
        performance analysis. This method runs `stmt` in a separate process
        so that Valgrind can instrument the program. Performance is severely
        degraded due to the instrumentation, howevever this is ameliorated by
        the fact that a small number of iterations is generally sufficient to
        obtain good measurements.

        In order to to use this method `valgrind`, `callgrind_control`, and
        `callgrind_annotate` must be installed.

        Because there is a process boundary between the caller (this process)
        and the `stmt` execution, `globals` cannot contain arbitrary in-memory
        data structures. (Unlike timing methods) Instead, globals are
        restricted to builtins, `nn.Modules`'s, and TorchScripted functions/modules
        to reduce the surprise factor from serialization and subsequent
        deserialization. The `GlobalsBridge` class provides more detail on this
        subject. Take particular care with nn.Modules: they rely on pickle and
        you may need to add an import to `setup` for them to transfer properly.

        By default, a profile for an empty statement will be collected and
        cached to indicate how many instructions are from the Python loop which
        drives `stmt`.

        Returns:
            A `CallgrindStats` object which provides instruction counts and
            some basic facilities for analyzing and manipulating results.
        """
        if not isinstance(self._task_spec.stmt, str):
            raise ValueError(
                "`collect_callgrind` currently only supports string `stmt`")

        # Check that the statement is valid. It doesn't guarantee success, but it's much
        # simpler and quicker to raise an exception for a faulty `stmt` or `setup` in
        # the parent process rather than the valgrind subprocess.
        self._timer.timeit(1)
        is_python = (self._language == Language.PYTHON)
        assert is_python or not self._globals
        return valgrind_timer_interface.wrapper_singleton().collect_callgrind(
            task_spec=self._task_spec,
            globals=self._globals,
            number=number,
            collect_baseline=collect_baseline and is_python,
            is_python=is_python,
            retain_out_file=retain_out_file,
        )
Exemple #4
0
    def test_collect_callgrind(self):
        with self.assertRaisesRegex(
            ValueError,
            r"`collect_callgrind` requires that globals be wrapped "
            r"in `CopyIfCallgrind` so that serialization is explicit."
        ):
            benchmark_utils.Timer(
                "pass",
                globals={"x": 1}
            ).collect_callgrind(collect_baseline=False)

        with self.assertRaisesRegex(
            # Subprocess raises AttributeError (from pickle),
            # _ValgrindWrapper re-raises as generic OSError.
            OSError, "AttributeError: Can't get attribute 'MyModule'"
        ):
            benchmark_utils.Timer(
                "model(1)",
                globals={"model": benchmark_utils.CopyIfCallgrind(MyModule())}
            ).collect_callgrind(collect_baseline=False)


        @torch.jit.script
        def add_one(x):
            return x + 1

        timer = benchmark_utils.Timer(
            "y = add_one(x) + k",
            setup="x = torch.ones((1,))",
            globals={
                "add_one": benchmark_utils.CopyIfCallgrind(add_one),
                "k": benchmark_utils.CopyIfCallgrind(5),
                "model": benchmark_utils.CopyIfCallgrind(
                    MyModule(),
                    setup=f"""\
                    import sys
                    sys.path.append({repr(os.path.split(os.path.abspath(__file__))[0])})
                    from test_benchmark_utils import MyModule
                    """
                )
            }
        )

        # Don't collect baseline to speed up unit test by ~30 seconds.
        stats = timer.collect_callgrind(number=1000, collect_baseline=False)
        counts = stats.counts(denoise=False)

        self.assertIsInstance(counts, int)
        self.assertGreater(counts, 0)

        from torch.utils.benchmark.utils.valgrind_wrapper.timer_interface import wrapper_singleton
        self.assertIsNone(
            wrapper_singleton()._bindings_module,
            "JIT'd bindings are only for back testing."
        )
            "x += 1",
            setup="x = torch.ones((1,))",
        )

        stats = timer.collect_callgrind(number=1000, repeats=20)
        assert isinstance(stats, tuple)

        # Check that the repeats are at least somewhat repeatable. (within 10 instructions per iter)
        counts = collections.Counter(
            [s.counts(denoise=True) // 10_000 * 10_000 for s in stats])
        self.assertGreater(
            max(counts.values()), 1,
            f"Every instruction count total was unique: {counts}")

        from torch.utils.benchmark.utils.valgrind_wrapper.timer_interface import wrapper_singleton
        self.assertIsNone(wrapper_singleton()._bindings_module,
                          "JIT'd bindings are only for back testing.")

    @slowTest
    @unittest.skipIf(IS_WINDOWS, "Valgrind is not supported on Windows.")
    @unittest.skipIf(IS_SANDCASTLE, "Valgrind is OSS only.")
    def test_collect_cpp_callgrind(self):
        timer = benchmark_utils.Timer(
            "x += 1;",
            setup="torch::Tensor x = torch::ones({1});",
            timer=timeit.default_timer,
            language="c++",
        )
        stats = [timer.collect_callgrind() for _ in range(3)]
        counts = [s.counts() for s in stats]
        # test reproducibility.
        timer = benchmark_utils.Timer(
            "x += 1",
            setup="x = torch.ones((1,))",
        )

        stats = timer.collect_callgrind(number=1000, repeats=20)
        assert isinstance(stats, tuple)

        # Check that the repeats are at least somewhat repeatable. (within 10 instructions per iter)
        counts = collections.Counter([s.counts(denoise=True) // 10_000 * 10_000 for s in stats])
        self.assertGreater(max(counts.values()), 1, f"Every instruction count total was unique: {counts}")

        from torch.utils.benchmark.utils.valgrind_wrapper.timer_interface import wrapper_singleton
        self.assertIsNone(
            wrapper_singleton()._bindings_module,
            "JIT'd bindings are only for back testing."
        )

    @slowTest
    @unittest.skipIf(IS_WINDOWS, "Valgrind is not supported on Windows.")
    @unittest.skipIf(IS_SANDCASTLE, "Valgrind is OSS only.")
    @unittest.skipIf(True, "Failing on clang, see 74398")
    def test_collect_cpp_callgrind(self):
        timer = benchmark_utils.Timer(
            "x += 1;",
            setup="torch::Tensor x = torch::ones({1});",
            timer=timeit.default_timer,
            language="c++",
        )
        stats = [