Beispiel #1
0
    def command(self, outpath: Path) -> List[str]:
        cmd = [str(llvm.clang_path()), "-c", "-emit-llvm", "-o", str(outpath)]
        if self.system_includes:
            cmd += get_system_library_flags()
        cmd += [str(s) for s in self.args]

        return cmd
    def command(self, outpath: Path) -> List[str]:
        cmd = [str(llvm.clang_path())]
        if self.system_includes:
            for directory in get_system_includes():
                cmd += ["-isystem", str(directory)]

        cmd += [str(s) for s in self.args]
        cmd += ["-c", "-emit-llvm", "-o", str(outpath)]

        return cmd
def test_make_benchmark_from_command_line_only_object_files(env: LlvmEnv):
    with temporary_working_directory():
        with open("a.c", "w") as f:
            f.write("int A() { return 5; }")

        # Compile b.c to object file:
        subprocess.check_call([str(llvm_paths.clang_path()), "a.c", "-c"],
                              timeout=60)
        assert (Path("a.o")).is_file()

        with pytest.raises(
                ValueError,
                match="Input command line has no source file inputs"):
            env.make_benchmark_from_command_line(["gcc", "a.o", "-c"])
def test_make_benchmark_from_command_line_build_cmd(env: LlvmEnv, cmd):
    with temporary_working_directory() as cwd:
        with open("in.c", "w") as f:
            f.write("int main() { return 0; }")

        bm = env.make_benchmark_from_command_line(cmd, system_includes=False)

        assert bm.proto.dynamic_config.build_cmd.argument[:4] == [
            str(llvm_paths.clang_path()),
            "-xir",
            "$IN",
            "-o",
        ]
        assert bm.proto.dynamic_config.build_cmd.argument[-1].endswith(
            f"{cwd}/foo")
    def __init__(
        self,
        working_directory: Path,
        action_space: ActionSpace,
        benchmark: Benchmark,
        use_custom_opt: bool = True,
    ):
        super().__init__(working_directory, action_space, benchmark)
        logging.info("Started a compilation session for %s", benchmark.uri)
        self._benchmark = benchmark
        self._action_space = action_space

        self.inst2vec = _INST2VEC_ENCODER

        # Resolve the paths to LLVM binaries once now.
        self._clang = str(llvm.clang_path())
        self._llc = str(llvm.llc_path())
        self._llvm_diff = str(llvm.llvm_diff_path())
        self._opt = str(llvm.opt_path())
        # LLVM's opt does not always enforce the loop optimization options passed as cli arguments.
        # Hence, we created our own exeutable with custom unrolling and vectorization pass in examples/loops_opt_service/opt_loops that enforces the unrolling and vectorization factors passed in its cli.
        # if self._use_custom_opt is true, use our custom exeutable, otherwise use LLVM's opt
        self._use_custom_opt = use_custom_opt

        # Dump the benchmark source to disk.
        self._src_path = str(self.working_dir / "benchmark.c")
        with open(self.working_dir / "benchmark.c", "wb") as f:
            f.write(benchmark.program.contents)

        self._llvm_path = str(self.working_dir / "benchmark.ll")
        self._llvm_before_path = str(self.working_dir /
                                     "benchmark.previous.ll")
        self._obj_path = str(self.working_dir / "benchmark.o")
        self._exe_path = str(self.working_dir / "benchmark.exe")

        run_command(
            [
                self._clang,
                "-Xclang",
                "-disable-O0-optnone",
                "-emit-llvm",
                "-S",
                self._src_path,
                "-o",
                self._llvm_path,
            ],
            timeout=30,
        )
Beispiel #6
0
 def preprocess(src: Path) -> bytes:
     """Front a C source through the compiler frontend."""
     # TODO(github.com/facebookresearch/CompilerGym/issues/325): We can skip
     # this pre-processing, or do it on the service side, once support for
     # multi-file benchmarks lands.
     cmd = [
         str(llvm.clang_path()),
         "-E",
         "-o",
         "-",
         "-I",
         str(NEURO_VECTORIZER_HEADER.parent),
         src,
     ] + get_system_library_flags()
     return subprocess.check_output(
         cmd,
         timeout=300,
     )
def test_make_benchmark_from_command_line_mixed_source_and_object_files(
        env: LlvmEnv, retcode: int):
    """Test a command line that contains both source files and precompiled
    object files. The object files should be filtered from compilation but
    used for the final link.
    """
    with temporary_working_directory():
        with open("a.c", "w") as f:
            f.write("""
#include "b.h"

int A() {
    return B();
}

int main() {
    return A();
}
""")

        with open("b.c", "w") as f:
            f.write(f"int B() {{ return {retcode}; }}")

        with open("b.h", "w") as f:
            f.write("int B();")

        # Compile b.c to object file:
        subprocess.check_call([str(llvm_paths.clang_path()), "b.c", "-c"],
                              timeout=60)
        assert (Path("b.o")).is_file()

        bm = env.make_benchmark_from_command_line(
            ["gcc", "a.c", "b.o", "-o", "foo"])
        env.reset(benchmark=bm)

        bm.compile(env)
        assert Path("foo").is_file()

        p = subprocess.Popen(["./foo"])
        p.communicate(timeout=60)
        assert p.returncode == retcode
Beispiel #8
0
    def make_benchmark_from_command_line(
        self,
        cmd: Union[str, List[str]],
        replace_driver: bool = True,
        system_includes: bool = True,
        timeout: int = 600,
    ) -> Benchmark:
        """Create a benchmark for use with this environment.

        This function takes a command line compiler invocation as input,
        modifies it to produce an unoptimized LLVM-IR bitcode, and then runs the
        modified command line to produce a bitcode benchmark.

        For example, the command line:

            >>> benchmark = env.make_benchmark_from_command_line(
            ...     ["gcc", "-DNDEBUG", "a.c", "b.c", "-o", "foo", "-lm"]
            ... )

        Will compile a.c and b.c to an unoptimized benchmark that can be then
        passed to :meth:`reset() <compiler_env.envs.CompilerEnv.reset>`.

        The way this works is to change the first argument of the command line
        invocation to the version of clang shipped with CompilerGym, and to then
        append command line flags that causes the compiler to produce LLVM-IR
        with optimizations disabled. For example the input command line:

        .. code-block::

            gcc -DNDEBUG a.c b.c -o foo -lm

        Will be rewritten to be roughly equivalent to:

        .. code-block::

            /path/to/compiler_gym/clang -DNDEG a.c b.c \\
                -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns \\ -c
                -emit-llvm  -o -

        The generated benchmark then has a method :meth:`compile()
        <compiler_env.envs.llvm.BenchmarkFromCommandLine.compile>` which
        completes the linking and compilatilion to executable. For the above
        example, this would be roughly equivalent to:

        .. code-block::

            /path/to/compiler_gym/clang environment-bitcode.bc -o foo -lm

        :param cmd: A command line compiler invocation, either as a list of
            arguments (e.g. :code:`["clang", "in.c"]`) or as a single shell
            string (e.g. :code:`"clang in.c"`).

        :param replace_driver: Whether to replace the first argument of the
            command with the clang driver used by this environment.

        :param system_includes: Whether to include the system standard libraries
            during compilation jobs. This requires a system toolchain. See
            :func:`get_system_library_flags`.

        :param timeout: The maximum number of seconds to allow the compilation
            job to run before terminating.

        :return: A :class:`BenchmarkFromCommandLine
            <compiler_gym.envs.llvm.BenchmarkFromCommandLine>` instance.

        :raises ValueError: If no command line is provided.

        :raises BenchmarkInitError: If executing the command line fails.

        :raises TimeoutExpired: If a compilation job exceeds :code:`timeout`
            seconds.
        """
        if not cmd:
            raise ValueError("Input command line is empty")

        # Split the command line if passed a single string.
        if isinstance(cmd, str):
            cmd = shlex.split(cmd)

        rewritten_cmd: List[str] = cmd.copy()

        if len(cmd) < 2:
            raise ValueError(
                f"Input command line '{join_cmd(cmd)}' is too short")

        # Append include flags for the system headers if requested.
        if system_includes:
            rewritten_cmd += get_system_library_flags()

        # Use the CompilerGym clang binary in place of the original driver.
        if replace_driver:
            rewritten_cmd[0] = str(clang_path())

        # Strip the -S flag, if present, as that changes the output format.
        rewritten_cmd = [c for c in rewritten_cmd if c != "-S"]

        invocation = GccInvocation(rewritten_cmd)

        # Strip the output specifier(s). This is not strictly required since we
        # override it later, but makes the generated command easier to
        # understand.
        for i in range(len(rewritten_cmd) - 2, -1, -1):
            if rewritten_cmd[i] == "-o":
                del rewritten_cmd[i + 1]
                del rewritten_cmd[i]

        # Fail early.
        if "-" in invocation.sources:
            raise ValueError("Input command line reads from stdin, "
                             f"which is not supported: '{join_cmd(cmd)}'")

        # Convert all of the C/C++ sources to bitcodes which can then be linked
        # into a single bitcode. We must process them individually because the
        # '-c' flag does not support multiple sources when we are specifying the
        # output path using '-o'.
        sources = set(s for s in invocation.sources if not s.endswith(".o"))

        if not sources:
            raise ValueError(
                f"Input command line has no source file inputs: '{join_cmd(cmd)}'"
            )

        bitcodes: List[bytes] = []
        for source in sources:
            # Adapt and execute the command line so that it will generate an
            # unoptimized bitecode file.
            emit_bitcode_command = rewritten_cmd.copy()

            # Strip the name of other sources:
            if len(sources) > 1:
                emit_bitcode_command = [
                    c for c in emit_bitcode_command
                    if c == source or c not in sources
                ]

            # Append the flags to emit the bitcode and disable the optimization
            # passes.
            emit_bitcode_command += [
                "-c",
                "-emit-llvm",
                "-o",
                "-",
                "-Xclang",
                "-disable-llvm-passes",
                "-Xclang",
                "-disable-llvm-optzns",
            ]

            with Popen(emit_bitcode_command,
                       stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE) as clang:
                logger.debug(
                    f"Generating LLVM bitcode benchmark: {join_cmd(emit_bitcode_command)}"
                )
                bitcode, stderr = clang.communicate(timeout=timeout)
                if clang.returncode:
                    raise BenchmarkInitError(
                        f"Failed to generate LLVM bitcode with error:\n"
                        f"{stderr.decode('utf-8').rstrip()}\n"
                        f"Running command: {join_cmd(emit_bitcode_command)}\n"
                        f"From original commandline: {join_cmd(cmd)}")
                bitcodes.append(bitcode)

        # If there were multiple sources then link the bitcodes together.
        if len(bitcodes) > 1:
            with TemporaryDirectory(dir=transient_cache_path("."),
                                    prefix="llvm-benchmark-") as dir:
                # Write the bitcodes to files.
                for i, bitcode in enumerate(bitcodes):
                    with open(os.path.join(dir, f"{i}.bc"), "wb") as f:
                        f.write(bitcode)

                # Link the bitcode files.
                llvm_link_cmd = [str(llvm_link_path()), "-o", "-"] + [
                    os.path.join(dir, f"{i}.bc") for i in range(len(bitcodes))
                ]
                with Popen(llvm_link_cmd,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE) as llvm_link:
                    bitcode, stderr = llvm_link.communicate(timeout=timeout)
                    if llvm_link.returncode:
                        raise BenchmarkInitError(
                            f"Failed to link LLVM bitcodes with error: {stderr.decode('utf-8')}"
                        )

        return BenchmarkFromCommandLine(invocation, bitcode, timeout)
Beispiel #9
0
def clang() -> Path:
    """Test fixture that yields the path of clang."""
    return llvm.clang_path()
Beispiel #10
0
def _compile_and_run_bitcode_file(
    bitcode_file: Path,
    cmd: str,
    cwd: Path,
    linkopts: List[str],
    env: Dict[str, str],
    num_runs: int,
    sanitizer: Optional[LlvmSanitizer] = None,
    timeout_seconds: float = 300,
    compilation_timeout_seconds: float = 60,
) -> BenchmarkExecutionResult:
    """Run the given cBench benchmark."""
    # cBench benchmarks expect that a file _finfo_dataset exists in the
    # current working directory and contains the number of benchmark
    # iterations in it.
    with open(cwd / "_finfo_dataset", "w") as f:
        print(num_runs, file=f)

    # Create a barebones execution environment for the benchmark.
    run_env = {
        "TMPDIR": os.environ.get("TMPDIR", ""),
        "HOME": os.environ.get("HOME", ""),
        "USER": os.environ.get("USER", ""),
        # Disable all logging from GRPC. In the past I have had false-positive
        # "Wrong output" errors caused by GRPC error messages being logged to
        # stderr.
        "GRPC_VERBOSITY": "NONE",
    }
    run_env.update(env)

    error_data = {}

    if sanitizer:
        clang_path = llvm.clang_path()
        binary = cwd / "a.out"
        error_data["run_cmd"] = cmd.replace("$BIN", "./a.out")
        # Generate the a.out binary file.
        compile_cmd = ([clang_path.name,
                        str(bitcode_file), "-o",
                        str(binary)] + _COMPILE_ARGS + list(linkopts) +
                       _SANITIZER_FLAGS.get(sanitizer, []))
        error_data["compile_cmd"] = compile_cmd
        logger.debug("compile: %s", compile_cmd)
        assert not binary.is_file()
        clang = subprocess.Popen(
            compile_cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
            env={"PATH": f"{clang_path.parent}:{os.environ.get('PATH', '')}"},
        )
        try:
            output, _ = clang.communicate(timeout=compilation_timeout_seconds)
        except subprocess.TimeoutExpired:
            # kill() was added in Python 3.7.
            if sys.version_info >= (3, 7, 0):
                clang.kill()
            else:
                clang.terminate()
            clang.communicate(timeout=30)  # Wait for shutdown to complete.
            error_data["timeout"] = compilation_timeout_seconds
            return BenchmarkExecutionResult(
                walltime_seconds=timeout_seconds,
                error=ValidationError(
                    type="Compilation timeout",
                    data=error_data,
                ),
            )
        if clang.returncode:
            error_data["output"] = output
            return BenchmarkExecutionResult(
                walltime_seconds=timeout_seconds,
                error=ValidationError(
                    type="Compilation failed",
                    data=error_data,
                ),
            )
        assert binary.is_file()
    else:
        lli_path = llvm.lli_path()
        error_data["run_cmd"] = cmd.replace("$BIN",
                                            f"{lli_path.name} benchmark.bc")
        run_env["PATH"] = str(lli_path.parent)

    try:
        logger.debug("exec: %s", error_data["run_cmd"])
        process = subprocess.Popen(
            error_data["run_cmd"],
            shell=True,
            stderr=subprocess.STDOUT,
            stdout=subprocess.PIPE,
            env=run_env,
            cwd=cwd,
        )

        with Timer() as timer:
            stdout, _ = process.communicate(timeout=timeout_seconds)
    except subprocess.TimeoutExpired:
        # kill() was added in Python 3.7.
        if sys.version_info >= (3, 7, 0):
            process.kill()
        else:
            process.terminate()
        process.communicate(timeout=30)  # Wait for shutdown to complete.
        error_data["timeout_seconds"] = timeout_seconds
        return BenchmarkExecutionResult(
            walltime_seconds=timeout_seconds,
            error=ValidationError(
                type="Execution timeout",
                data=error_data,
            ),
        )
    finally:
        if sanitizer:
            binary.unlink()

    try:
        output = stdout.decode("utf-8")
    except UnicodeDecodeError:
        output = "<binary>"

    if process.returncode:
        # Runtime error.
        if sanitizer == LlvmSanitizer.ASAN and "LeakSanitizer" in output:
            error_type = "Memory leak"
        elif sanitizer == LlvmSanitizer.ASAN and "AddressSanitizer" in output:
            error_type = "Memory error"
        elif sanitizer == LlvmSanitizer.MSAN and "MemorySanitizer" in output:
            error_type = "Memory error"
        elif "Segmentation fault" in output:
            error_type = "Segmentation fault"
        elif "Illegal Instruction" in output:
            error_type = "Illegal Instruction"
        else:
            error_type = f"Runtime error ({process.returncode})"

        error_data["return_code"] = process.returncode
        error_data["output"] = output
        return BenchmarkExecutionResult(
            walltime_seconds=timer.time,
            error=ValidationError(
                type=error_type,
                data=error_data,
            ),
        )
    return BenchmarkExecutionResult(walltime_seconds=timer.time, output=output)