コード例 #1
0
    def _invoke(
        self,
        task_spec: common.TaskSpec,
        globals: Dict[str, Any],
        number: int,
        is_python: bool,
    ) -> Tuple[FunctionCounts, FunctionCounts]:
        """Core invocation method for Callgrind collection.

        Valgrind operates by effectively replacing the CPU with an emulated
        version which allows it to instrument any code at the cost of severe
        performance degradation. This has the practical effect that in order
        to collect Callgrind statistics, a new process has to be created
        running under `valgrind`. The steps for this process are:

        1) Create a scratch directory.
        2) Codegen a run script. (_ValgrindWrapper._construct_script)
            Inside the run script:
                * Validate that Python and torch match the parent process
                * Validate that it is indeed running under valgrind
                * Execute `setup` and warm up `stmt`
                * Begin collecting stats
                * Run the `stmt` loop
                * Stop collecting stats
        3) Parse the run results.
        4) Cleanup the scratch directory.
        """
        working_dir = tempfile.mkdtemp()
        data_dir = os.path.join(working_dir, "data")
        script_file = os.path.join(working_dir, "timer_callgrind.py")
        callgrind_out = os.path.join(working_dir, "callgrind.out")
        error_log = os.path.join(working_dir, "error.txt")
        stat_log = os.path.join(working_dir, "callgrind_stat.txt")
        stdout_stderr_log = os.path.join(working_dir, "stdout_stderr.log")

        def run(args: List[str],
                **kwargs: Any) -> Tuple[CompletedProcessType, str]:
            # https://thraxil.org/users/anders/posts/2008/03/13/Subprocess-Hanging-PIPE-is-your-enemy/
            f_stdout_stderr = open(stdout_stderr_log, "wb")
            try:
                invocation = subprocess.run(
                    args,
                    stdout=f_stdout_stderr,
                    stderr=subprocess.STDOUT,
                    **kwargs,
                )
                with open(stdout_stderr_log, "rt") as f:
                    return invocation, f.read()
            finally:
                f_stdout_stderr.close()

        try:
            if is_python:
                if self._bindings_module is not None:
                    shutil.copy(
                        self._bindings_module.__file__,
                        os.path.join(
                            working_dir,
                            os.path.split(self._bindings_module.__file__)[1]))

                script_file = os.path.join(working_dir, "timer_callgrind.py")
                with open(script_file, "wt") as f:
                    f.write(
                        self._construct_script(task_spec,
                                               globals=GlobalsBridge(
                                                   globals, data_dir),
                                               number=number,
                                               error_log=error_log,
                                               stat_log=stat_log,
                                               bindings=self._bindings_module))
                run_loop_cmd = ["python", script_file]
            else:
                run_loop_exec = cpp_jit.compile_callgrind_template(
                    task_spec.stmt,
                    task_spec.setup,
                )
                run_loop_cmd = [
                    run_loop_exec,
                    "--number",
                    str(number),
                    "--number_warmup",
                    str(min(number, 10)),
                    "--number_threads",
                    str(task_spec.num_threads),
                ]

            valgrind_invocation, valgrind_invocation_output = run([
                "valgrind",
                "--tool=callgrind",
                f"--callgrind-out-file={callgrind_out}",
                "--dump-line=yes",
                "--dump-instr=yes",
                "--instr-atstart=yes",
                "--collect-atstart=no",
            ] + run_loop_cmd)

            if valgrind_invocation.returncode:
                error_report = ""
                if os.path.exists(error_log):
                    with open(error_log, "rt") as f:
                        error_report = f.read()
                if not error_report:
                    error_report = "Unknown error.\n" + valgrind_invocation_output

                raise OSError(
                    f"Failed to collect callgrind profile:\n{error_report}")

            def parse_output(inclusive: bool) -> FunctionCounts:
                annotate_invocation, annotate_invocation_output = run(
                    [
                        "callgrind_annotate",
                        f"--inclusive={'yes' if inclusive else 'no'}",
                        callgrind_out
                    ],
                    check=True)

                begin_collecting = False
                fn_counts = []
                for l in annotate_invocation_output.splitlines(keepends=False):
                    if not begin_collecting and re.match(
                            r"Ir\s+file:function", l):
                        begin_collecting = True
                        continue

                    count_match = re.match(r"^\s*([0-9,]+)\s+(.+:.+)$", l)
                    if count_match:
                        ir_str, file_function = count_match.groups()
                        ir = int(ir_str.replace(",", ""))
                        fn_counts.append(FunctionCount(ir, file_function))
                        continue

                    if begin_collecting and re.match(r"-+", l):
                        continue

                    begin_collecting = False

                return FunctionCounts(tuple(sorted(fn_counts, reverse=True)),
                                      inclusive=inclusive)

            return parse_output(inclusive=True), parse_output(inclusive=False)
        finally:
            shutil.rmtree(working_dir)
コード例 #2
0
    def _invoke(
        self,
        *,
        task_spec: common.TaskSpec,
        globals: Dict[str, Any],
        number: int,
        repeats: int,
        collect_baseline: bool,
        is_python: bool,
        retain_out_file: bool,
    ) -> Tuple[Tuple[FunctionCounts, FunctionCounts, Optional[str]], ...]:
        """Core invocation method for Callgrind collection.

        Valgrind operates by effectively replacing the CPU with an emulated
        version which allows it to instrument any code at the cost of severe
        performance degradation. This has the practical effect that in order
        to collect Callgrind statistics, a new process has to be created
        running under `valgrind`. The steps for this process are:

        1) Create a scratch directory.
        2) Codegen a run script. (_ValgrindWrapper._construct_script)
            Inside the run script:
                * Validate that Python and torch match the parent process
                * Validate that it is indeed running under valgrind
                * Execute `setup` and warm up `stmt`
                * Begin collecting stats
                * Run the `stmt` loop
                * Stop collecting stats
        3) Parse the run results.
        4) Cleanup the scratch directory.
        """
        working_dir = common._make_temp_dir(prefix="callgrind")
        data_dir = os.path.join(working_dir, "data")
        script_file = os.path.join(working_dir, "timer_callgrind.py")
        callgrind_out = os.path.join(working_dir, "callgrind.out")
        error_log = os.path.join(working_dir, "error.txt")
        stat_log = os.path.join(working_dir, "callgrind_stat.txt")
        stdout_stderr_log = os.path.join(working_dir, "stdout_stderr.log")

        def run(args: List[str],
                **kwargs: Any) -> Tuple[CompletedProcessType, str]:
            # https://thraxil.org/users/anders/posts/2008/03/13/Subprocess-Hanging-PIPE-is-your-enemy/
            f_stdout_stderr = open(stdout_stderr_log, "wb")
            try:
                invocation = subprocess.run(
                    args,
                    stdout=f_stdout_stderr,
                    stderr=subprocess.STDOUT,
                    **kwargs,
                )
                with open(stdout_stderr_log, "rt") as f:
                    return invocation, f.read()
            finally:
                f_stdout_stderr.close()

        try:
            if is_python:
                if self._bindings_module is not None:
                    shutil.copy(
                        self._bindings_module.__file__,
                        os.path.join(
                            working_dir,
                            os.path.split(self._bindings_module.__file__)[1]))

                script_file = os.path.join(working_dir, "timer_callgrind.py")
                with open(script_file, "wt") as f:
                    f.write(
                        self._construct_script(
                            task_spec,
                            globals=GlobalsBridge(globals, data_dir),
                            number=number,
                            repeats=repeats,
                            collect_baseline=collect_baseline,
                            error_log=error_log,
                            stat_log=stat_log,
                            bindings=self._bindings_module))

                run_loop_cmd = ["python", script_file]
            else:
                assert not collect_baseline
                run_loop_exec = cpp_jit.compile_callgrind_template(
                    stmt=task_spec.stmt,
                    setup=task_spec.setup,
                    global_setup=task_spec.global_setup,
                )
                run_loop_cmd = [
                    run_loop_exec,
                    "--number",
                    str(number),
                    "--number_warmup",
                    str(min(number, 10)),
                    "--repeats",
                    str(repeats),
                    "--number_threads",
                    str(task_spec.num_threads),
                ]

            valgrind_invocation, valgrind_invocation_output = run([
                "valgrind",
                "--tool=callgrind",
                f"--callgrind-out-file={callgrind_out}",
                "--dump-line=yes",
                "--dump-instr=yes",
                "--instr-atstart=yes",
                "--collect-atstart=no",
            ] + run_loop_cmd)

            if valgrind_invocation.returncode:
                error_report = ""
                if os.path.exists(error_log):
                    with open(error_log, "rt") as f:
                        error_report = f.read()
                if not error_report:
                    error_report = "Unknown error.\n" + valgrind_invocation_output

                raise OSError(
                    f"Failed to collect callgrind profile:\n{error_report}")

            def parse_output(fpath: str, inclusive: bool) -> FunctionCounts:
                annotate_invocation, annotate_invocation_output = run(
                    [
                        "callgrind_annotate",
                        f"--inclusive={'yes' if inclusive else 'no'}",
                        "--threshold=100", "--show-percs=no", fpath
                    ],
                    check=True)

                total_pattern = re.compile(r"^([0-9,]+)\s+PROGRAM TOTALS")
                begin_pattern = re.compile(r"Ir\s+file:function")
                function_pattern = re.compile(r"^\s*([0-9,]+)\s+(.+:.+)$")

                class ScanState(enum.Enum):
                    SCANNING_FOR_TOTAL = 0
                    SCANNING_FOR_START = 1
                    PARSING = 2

                scan_state = ScanState.SCANNING_FOR_TOTAL
                fn_counts = []
                for l in annotate_invocation_output.splitlines(keepends=False):
                    if scan_state == ScanState.SCANNING_FOR_TOTAL:
                        total_match = total_pattern.match(l)
                        if total_match:
                            program_totals = int(
                                total_match.groups()[0].replace(",", ""))
                            scan_state = ScanState.SCANNING_FOR_START

                    elif scan_state == ScanState.SCANNING_FOR_START:
                        if begin_pattern.match(l):
                            scan_state = ScanState.PARSING

                    else:
                        assert scan_state == ScanState.PARSING
                        fn_match = function_pattern.match(l)
                        if fn_match:
                            ir_str, file_function = fn_match.groups()
                            ir = int(ir_str.replace(",", ""))
                            if ir == program_totals:
                                # Callgrind includes some top level red herring symbols when
                                # a program dumps multiple profiles.
                                continue
                            fn_counts.append(FunctionCount(ir, file_function))

                        elif re.match(r"-+", l):
                            # Ignore heading separator lines.
                            continue

                        else:
                            break

                assert scan_state == ScanState.PARSING, f"Failed to parse {fpath}"
                return FunctionCounts(tuple(sorted(fn_counts, reverse=True)),
                                      inclusive=inclusive)

            def read_results(
                i: int
            ) -> Tuple[FunctionCounts, FunctionCounts, Optional[str]]:
                if i == repeats and not collect_baseline:
                    # Null baseline.
                    return (
                        FunctionCounts((), inclusive=True),
                        FunctionCounts((), inclusive=False),
                        None,
                    )

                fpath = f"{callgrind_out}.{i + 1}"  # Callgrind one-indexes files.
                callgrind_out_contents: Optional[str] = None
                if retain_out_file:
                    with open(fpath, "rt") as f:
                        callgrind_out_contents = f.read()

                return (parse_output(fpath, inclusive=True),
                        parse_output(fpath,
                                     inclusive=False), callgrind_out_contents)

            return tuple(read_results(i) for i in range(repeats + 1))
        finally:
            shutil.rmtree(working_dir)