コード例 #1
0
ファイル: llvm_stress.py プロジェクト: kokizzu/CompilerGym
    def benchmark_from_seed(self, seed: int) -> Benchmark:
        """Get a benchmark from a uint32 seed.

        :param seed: A number in the range 0 <= n < 2^32.

        :return: A benchmark instance.
        """
        self.install()

        # Run llvm-stress with the given seed and pipe the output to llvm-as to
        # assemble a bitcode.
        try:
            with Popen(
                [str(llvm.llvm_stress_path()), f"--seed={seed}"],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
            ) as llvm_stress:
                with Popen(
                    [str(llvm.llvm_as_path()), "-"],
                        stdin=llvm_stress.stdout,
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE,
                ) as llvm_as:
                    stdout, _ = llvm_as.communicate(timeout=60)
                    llvm_stress.communicate(timeout=60)
                    if llvm_stress.returncode or llvm_as.returncode:
                        raise BenchmarkInitError(
                            "Failed to generate benchmark")
        except subprocess.TimeoutExpired:
            raise BenchmarkInitError("Benchmark generation timed out")

        return Benchmark.from_file_contents(f"{self.name}/{seed}", stdout)
コード例 #2
0
    def compile(self, env, timeout: int = 60) -> None:
        """This completes the compilation and linking of the final executable
        specified by the original command line.
        """
        with tempfile.NamedTemporaryFile(
            dir=transient_cache_path("."), prefix="benchmark-", suffix=".bc"
        ) as f:
            bitcode_path = f.name
            env.write_bitcode(bitcode_path)

            # Set the placeholder for input path.
            cmd = list(self.proto.dynamic_config.build_cmd.argument).copy()
            cmd = [bitcode_path if c == "$IN" else c for c in cmd]

            logger.debug(f"$ {join_cmd(cmd)}")

            with Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
            ) as lower:
                stdout, _ = lower.communicate(timeout=timeout)

            if lower.returncode:
                raise BenchmarkInitError(
                    f"Failed to lower LLVM bitcode with error:\n"
                    f"{stdout.decode('utf-8').rstrip()}\n"
                    f"Running command: {join_cmd(cmd)}"
                )
コード例 #3
0
ファイル: env_tests.py プロジェクト: kokizzu/CompilerGym
 def run(cmd):
     with Popen(cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=True) as p:
         stdout, stderr = p.communicate(timeout=10)
         return p.returncode, stdout, stderr
コード例 #4
0
    def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark:
        self.install()

        benchmark_name = uri.path[1:]
        if not benchmark_name:
            raise LookupError(f"No benchmark specified: {uri}")

        # The absolute path of the file, without an extension.
        path_stem = os.path.normpath(f"{self.dataset_root}/{uri.path}")

        bc_path, cl_path = Path(f"{path_stem}.bc"), Path(f"{path_stem}.cl")

        # If the file does not exist, compile it on-demand.
        if not bc_path.is_file():
            if not cl_path.is_file():
                raise LookupError(
                    f"Benchmark not found: {uri} (file not found: {cl_path}, path_stem {path_stem})"
                )

            # Compile the OpenCL kernel into a bitcode file.
            with atomic_file_write(bc_path) as tmp_bc_path:
                compile_command: List[str] = ClangInvocation.from_c_file(
                    cl_path,
                    copt=[
                        "-isystem",
                        str(self.libclc_dir),
                        "-include",
                        str(self.opencl_h_path),
                        "-target",
                        "nvptx64-nvidia-nvcl",
                        "-ferror-limit=1",  # Stop on first error.
                        "-w",  # No warnings.
                    ],
                ).command(outpath=tmp_bc_path)
                logger.debug("Exec %s", compile_command)
                try:
                    with Popen(
                            compile_command,
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                    ) as clang:
                        _, stderr = communicate(clang, timeout=300)
                except subprocess.TimeoutExpired:
                    raise BenchmarkInitError(
                        f"Benchmark compilation timed out: {uri}")

            if clang.returncode:
                compile_command = " ".join(compile_command)
                error = truncate(stderr.decode("utf-8"),
                                 max_lines=20,
                                 max_line_len=20000)
                raise BenchmarkInitError(f"Compilation job failed!\n"
                                         f"Command: {compile_command}\n"
                                         f"Error: {error}")

        return BenchmarkWithSource.create(uri, bc_path, "kernel.cl", cl_path)
コード例 #5
0
def get_compiler_includes(compiler: str) -> Iterable[Path]:
    """Run the system compiler in verbose mode on a dummy input to get the
    system header search path.
    """
    # Create a temporary directory to write the compiled 'binary' to, since
    # GNU assembler does not support piping to stdout.
    with tempfile.TemporaryDirectory() as d:
        try:
            with Popen(
                [
                    compiler, "-xc++", "-v", "-c", "-", "-o",
                    str(Path(d) / "a.out")
                ],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.PIPE,
                    stdin=subprocess.PIPE,
                    universal_newlines=True,
            ) as process:
                _, stderr = process.communicate(input="", timeout=30)
                if process.returncode:
                    raise OSError(f"Failed to invoke {compiler}. "
                                  f"Is there a working system compiler?\n"
                                  f"Error: {stderr.strip()}")
        except FileNotFoundError as e:
            raise OSError(f"Failed to invoke {compiler}. "
                          f"Is there a working system compiler?\n"
                          f"Error: {e}") from e

    # Parse the compiler output that matches the conventional output format
    # used by clang and GCC:
    #
    #     #include <...> search starts here:
    #     /path/1
    #     /path/2
    #     End of search list
    in_search_list = False
    for line in stderr.split("\n"):
        if in_search_list and line.startswith("End of search list"):
            break
        elif in_search_list:
            # We have an include path to return.
            path = Path(line.strip())
            yield path
            # Compatibility fix for compiling benchmark sources which use the
            # '#include <endian.h>' header, which on macOS is located in a
            # 'machine/endian.h' directory.
            if (path / "machine").is_dir():
                yield path / "machine"
        elif line.startswith("#include <...> search starts here:"):
            in_search_list = True
    else:
        msg = f"Failed to parse '#include <...>' search paths from {compiler}"
        stderr = stderr.strip()
        if stderr:
            msg += f":\n{stderr}"
        raise OSError(msg)
コード例 #6
0
def test_force_working_dir(bin: Path, tmpdir):
    """Test that expected files are generated in the working directory."""
    tmpdir = Path(tmpdir) / "subdir"
    with Popen([str(bin), "--working_dir", str(tmpdir)]):
        for _ in range(10):
            sleep(0.5)
            if (tmpdir / "pid.txt").is_file() and (tmpdir /
                                                   "port.txt").is_file():
                break
        else:
            pytest.fail(
                f"PID file not found in {tmpdir}: {list(tmpdir.iterdir())}")
コード例 #7
0
def test_fuzz(env: LlvmEnv, tmpwd: Path, llvm_opt: Path, llvm_diff: Path):
    """This test produces a random trajectory and then uses the commandline()
    generated with opt to check that the states are equivalent.
    """
    del tmpwd

    env.reset()
    env.write_ir("input.ll")
    assert Path("input.ll").is_file()

    # In case of a failure, create a regression test by copying the body of this
    # function and replacing the below line with the commandline printed below.
    apply_random_trajectory(
        env,
        random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE,
        timeout=30)
    commandline = env.commandline(textformat=True)
    print(env.state)  # For debugging in case of failure.

    # Write the post-trajectory state to file.
    env.write_ir("env.ll")
    assert Path("env.ll").is_file()

    # Run the environment commandline using LLVM opt.
    subprocess.check_call(commandline,
                          env={"PATH": str(llvm_opt.parent)},
                          shell=True,
                          timeout=60)
    assert Path("output.ll").is_file()
    os.rename("output.ll", "opt.ll")

    with Popen(
        [llvm_diff, "opt.ll", "env.ll"],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            universal_newlines=True,
    ) as diff:
        stdout, stderr = diff.communicate(timeout=300)
        if diff.returncode:
            pytest.fail(
                f"Opt produced different output to CompilerGym "
                f"(returncode: {diff.returncode}):\n{stdout}\n{stderr}")
コード例 #8
0
def test_force_port(bin: Path, tmpdir):
    """Test that a forced --port value is respected."""
    port = unsafe_select_unused_port()
    assert port_is_free(port)  # Sanity check

    tmpdir = Path(tmpdir)
    with Popen([str(bin), "--port", str(port), "--working_dir", str(tmpdir)]):
        for _ in range(10):
            sleep(0.5)
            if (tmpdir / "pid.txt").is_file() and (tmpdir /
                                                   "port.txt").is_file():
                break
        else:
            pytest.fail(
                f"PID file not found in {tmpdir}: {list(tmpdir.iterdir())}")

        with open(tmpdir / "port.txt") as f:
            actual_port = int(f.read())

        assert actual_port == port
        assert not port_is_free(actual_port)
コード例 #9
0
    def make_benchmark_from_command_line(
        self,
        cmd: Union[str, List[str]],
        replace_driver: bool = True,
        system_includes: bool = True,
        timeout: int = 600,
    ) -> Benchmark:
        """Create a benchmark for use with this environment.

        This function takes a command line compiler invocation as input,
        modifies it to produce an unoptimized LLVM-IR bitcode, and then runs the
        modified command line to produce a bitcode benchmark.

        For example, the command line:

            >>> benchmark = env.make_benchmark_from_command_line(
            ...     ["gcc", "-DNDEBUG", "a.c", "b.c", "-o", "foo", "-lm"]
            ... )

        Will compile a.c and b.c to an unoptimized benchmark that can be then
        passed to :meth:`reset() <compiler_env.envs.CompilerEnv.reset>`.

        The way this works is to change the first argument of the command line
        invocation to the version of clang shipped with CompilerGym, and to then
        append command line flags that causes the compiler to produce LLVM-IR
        with optimizations disabled. For example the input command line:

        .. code-block::

            gcc -DNDEBUG a.c b.c -o foo -lm

        Will be rewritten to be roughly equivalent to:

        .. code-block::

            /path/to/compiler_gym/clang -DNDEG a.c b.c \\
                -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns \\ -c
                -emit-llvm  -o -

        The generated benchmark then has a method :meth:`compile()
        <compiler_env.envs.llvm.BenchmarkFromCommandLine.compile>` which
        completes the linking and compilatilion to executable. For the above
        example, this would be roughly equivalent to:

        .. code-block::

            /path/to/compiler_gym/clang environment-bitcode.bc -o foo -lm

        :param cmd: A command line compiler invocation, either as a list of
            arguments (e.g. :code:`["clang", "in.c"]`) or as a single shell
            string (e.g. :code:`"clang in.c"`).

        :param replace_driver: Whether to replace the first argument of the
            command with the clang driver used by this environment.

        :param system_includes: Whether to include the system standard libraries
            during compilation jobs. This requires a system toolchain. See
            :func:`get_system_library_flags`.

        :param timeout: The maximum number of seconds to allow the compilation
            job to run before terminating.

        :return: A :class:`BenchmarkFromCommandLine
            <compiler_gym.envs.llvm.BenchmarkFromCommandLine>` instance.

        :raises ValueError: If no command line is provided.

        :raises BenchmarkInitError: If executing the command line fails.

        :raises TimeoutExpired: If a compilation job exceeds :code:`timeout`
            seconds.
        """
        if not cmd:
            raise ValueError("Input command line is empty")

        # Split the command line if passed a single string.
        if isinstance(cmd, str):
            cmd = shlex.split(cmd)

        rewritten_cmd: List[str] = cmd.copy()

        if len(cmd) < 2:
            raise ValueError(
                f"Input command line '{join_cmd(cmd)}' is too short")

        # Append include flags for the system headers if requested.
        if system_includes:
            rewritten_cmd += get_system_library_flags()

        # Use the CompilerGym clang binary in place of the original driver.
        if replace_driver:
            rewritten_cmd[0] = str(clang_path())

        # Strip the -S flag, if present, as that changes the output format.
        rewritten_cmd = [c for c in rewritten_cmd if c != "-S"]

        invocation = GccInvocation(rewritten_cmd)

        # Strip the output specifier(s). This is not strictly required since we
        # override it later, but makes the generated command easier to
        # understand.
        for i in range(len(rewritten_cmd) - 2, -1, -1):
            if rewritten_cmd[i] == "-o":
                del rewritten_cmd[i + 1]
                del rewritten_cmd[i]

        # Fail early.
        if "-" in invocation.sources:
            raise ValueError("Input command line reads from stdin, "
                             f"which is not supported: '{join_cmd(cmd)}'")

        # Convert all of the C/C++ sources to bitcodes which can then be linked
        # into a single bitcode. We must process them individually because the
        # '-c' flag does not support multiple sources when we are specifying the
        # output path using '-o'.
        sources = set(s for s in invocation.sources if not s.endswith(".o"))

        if not sources:
            raise ValueError(
                f"Input command line has no source file inputs: '{join_cmd(cmd)}'"
            )

        bitcodes: List[bytes] = []
        for source in sources:
            # Adapt and execute the command line so that it will generate an
            # unoptimized bitecode file.
            emit_bitcode_command = rewritten_cmd.copy()

            # Strip the name of other sources:
            if len(sources) > 1:
                emit_bitcode_command = [
                    c for c in emit_bitcode_command
                    if c == source or c not in sources
                ]

            # Append the flags to emit the bitcode and disable the optimization
            # passes.
            emit_bitcode_command += [
                "-c",
                "-emit-llvm",
                "-o",
                "-",
                "-Xclang",
                "-disable-llvm-passes",
                "-Xclang",
                "-disable-llvm-optzns",
            ]

            with Popen(emit_bitcode_command,
                       stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE) as clang:
                logger.debug(
                    f"Generating LLVM bitcode benchmark: {join_cmd(emit_bitcode_command)}"
                )
                bitcode, stderr = clang.communicate(timeout=timeout)
                if clang.returncode:
                    raise BenchmarkInitError(
                        f"Failed to generate LLVM bitcode with error:\n"
                        f"{stderr.decode('utf-8').rstrip()}\n"
                        f"Running command: {join_cmd(emit_bitcode_command)}\n"
                        f"From original commandline: {join_cmd(cmd)}")
                bitcodes.append(bitcode)

        # If there were multiple sources then link the bitcodes together.
        if len(bitcodes) > 1:
            with TemporaryDirectory(dir=transient_cache_path("."),
                                    prefix="llvm-benchmark-") as dir:
                # Write the bitcodes to files.
                for i, bitcode in enumerate(bitcodes):
                    with open(os.path.join(dir, f"{i}.bc"), "wb") as f:
                        f.write(bitcode)

                # Link the bitcode files.
                llvm_link_cmd = [str(llvm_link_path()), "-o", "-"] + [
                    os.path.join(dir, f"{i}.bc") for i in range(len(bitcodes))
                ]
                with Popen(llvm_link_cmd,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE) as llvm_link:
                    bitcode, stderr = llvm_link.communicate(timeout=timeout)
                    if llvm_link.returncode:
                        raise BenchmarkInitError(
                            f"Failed to link LLVM bitcodes with error: {stderr.decode('utf-8')}"
                        )

        return BenchmarkFromCommandLine(invocation, bitcode, timeout)
コード例 #10
0
ファイル: llvm_benchmark.py プロジェクト: kokizzu/CompilerGym
def _get_system_library_flags(compiler: str) -> Iterable[str]:
    """Private implementation function."""
    # Create a temporary file to write the compiled binary to, since GNU
    # assembler does not support piping to stdout.
    transient_cache = transient_cache_path(".")
    transient_cache.mkdir(parents=True, exist_ok=True)
    with tempfile.NamedTemporaryFile(dir=transient_cache) as f:
        cmd = [compiler, "-xc++", "-v", "-", "-o", f.name]
        # On macOS we need to compile a binary to invoke the linker.
        if sys.platform != "darwin":
            cmd.append("-c")

        # Retry loop to permit timeouts, though unlikely, in case of a
        # heavily overloaded system (I have observed CI failures because
        # of this).
        for _ in range(3):
            try:
                with Popen(
                    cmd,
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.PIPE,
                    stdin=subprocess.PIPE,
                    universal_newlines=True,
                ) as process:
                    _, stderr = communicate(
                        process=process, input="int main(){return 0;}", timeout=30
                    )
                    if process.returncode:
                        raise HostCompilerFailure(
                            f"Failed to invoke '{compiler}'. "
                            f"Is there a working system compiler?\n"
                            f"Error: {stderr.strip()}"
                        )
                    break
            except subprocess.TimeoutExpired:
                continue
            except FileNotFoundError as e:
                raise HostCompilerFailure(
                    f"Failed to invoke '{compiler}'. "
                    f"Is there a working system compiler?\n"
                    f"Error: {e}"
                ) from e
        else:
            raise HostCompilerFailure(
                f"Compiler invocation '{join_cmd(cmd)}' timed out after 3 attempts."
            )

    # Parse the compiler output that matches the conventional output format
    # used by clang and GCC:
    #
    #     #include <...> search starts here:
    #     /path/1
    #     /path/2
    #     End of search list
    in_search_list = False
    lines = stderr.split("\n")
    for line in lines:
        if in_search_list and line.startswith("End of search list"):
            break
        elif in_search_list:
            # We have an include path to return.
            path = Path(line.strip())
            yield "-isystem"
            yield str(path)
            # Compatibility fix for compiling benchmark sources which use the
            # '#include <endian.h>' header, which on macOS is located in a
            # 'machine/endian.h' directory.
            if (path / "machine").is_dir():
                yield "-isystem"
                yield str(path / "machine")
        elif line.startswith("#include <...> search starts here:"):
            in_search_list = True
    else:
        msg = f"Failed to parse '#include <...>' search paths from '{compiler}'"
        stderr = stderr.strip()
        if stderr:
            msg += f":\n{stderr}"
        raise UnableToParseHostCompilerOutput(msg)

    if sys.platform == "darwin":
        yield "-L/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib"
コード例 #11
0
ファイル: llvm_benchmark.py プロジェクト: kokizzu/CompilerGym
def make_benchmark(
    inputs: Union[str, Path, ClangInvocation, List[Union[str, Path, ClangInvocation]]],
    copt: Optional[List[str]] = None,
    system_includes: bool = True,
    timeout: int = 600,
) -> Benchmark:
    """Create a benchmark for use by LLVM environments.

    This function takes one or more inputs and uses them to create an LLVM
    bitcode benchmark that can be passed to
    :meth:`compiler_gym.envs.LlvmEnv.reset`.

    The following input types are supported:

    +-----------------------------------------------------+---------------------+-------------------------------------------------------------+
    | **File Suffix**                                     | **Treated as**      | **Converted using**                                         |
    +-----------------------------------------------------+---------------------+-------------------------------------------------------------+
    | :code:`.bc`                                         | LLVM IR bitcode     | No conversion required.                                     |
    +-----------------------------------------------------+---------------------+-------------------------------------------------------------+
    | :code:`.ll`                                         | LLVM IR text format | Assembled to bitcode using llvm-as.                         |
    +-----------------------------------------------------+---------------------+-------------------------------------------------------------+
    | :code:`.c`, :code:`.cc`, :code:`.cpp`, :code:`.cxx` | C / C++ source      | Compiled to bitcode using clang and the given :code:`copt`. |
    +-----------------------------------------------------+---------------------+-------------------------------------------------------------+

    .. note::

        The LLVM IR format has no compatability guarantees between versions (see
        `LLVM docs
        <https://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility>`_).
        You must ensure that any :code:`.bc` and :code:`.ll` files are
        compatible with the LLVM version used by CompilerGym, which can be
        reported using :func:`env.compiler_version
        <compiler_gym.envs.CompilerEnv.compiler_version>`.

    E.g. for single-source C/C++ programs, you can pass the path of the source
    file:

        >>> benchmark = make_benchmark('my_app.c')
        >>> env = gym.make("llvm-v0")
        >>> env.reset(benchmark=benchmark)

    The clang invocation used is roughly equivalent to:

    .. code-block::

        $ clang my_app.c -O0 -c -emit-llvm -o benchmark.bc

    Additional compile-time arguments to clang can be provided using the
    :code:`copt` argument:

        >>> benchmark = make_benchmark('/path/to/my_app.cpp', copt=['-O2'])

    If you need more fine-grained control over the options, you can directly
    construct a :class:`ClangInvocation
    <compiler_gym.envs.llvm.ClangInvocation>` to pass a list of arguments to
    clang:

        >>> benchmark = make_benchmark(
            ClangInvocation(['/path/to/my_app.c'], system_includes=False, timeout=10)
        )

    For multi-file programs, pass a list of inputs that will be compiled
    separately and then linked to a single module:

        >>> benchmark = make_benchmark([
            'main.c',
            'lib.cpp',
            'lib2.bc',
            'foo/input.bc'
        ])

    :param inputs: An input, or list of inputs.

    :param copt: A list of command line options to pass to clang when compiling
        source files.

    :param system_includes: Whether to include the system standard libraries
        during compilation jobs. This requires a system toolchain. See
        :func:`get_system_library_flags`.

    :param timeout: The maximum number of seconds to allow clang to run before
        terminating.

    :return: A :code:`Benchmark` instance.

    :raises FileNotFoundError: If any input sources are not found.

    :raises TypeError: If the inputs are of unsupported types.

    :raises OSError: If a suitable compiler cannot be found.

    :raises BenchmarkInitError: If a compilation job fails.

    :raises TimeoutExpired: If a compilation job exceeds :code:`timeout`
        seconds.
    """
    copt = copt or []

    bitcodes: List[Path] = []
    clang_jobs: List[ClangInvocation] = []
    ll_paths: List[Path] = []

    def _add_path(path: Path):
        if not path.is_file():
            raise FileNotFoundError(path)

        if path.suffix == ".bc":
            bitcodes.append(path.absolute())
        elif path.suffix in {".c", ".cc", ".cpp", ".cxx"}:
            clang_jobs.append(
                ClangInvocation.from_c_file(
                    path, copt=copt, system_includes=system_includes, timeout=timeout
                )
            )
        elif path.suffix == ".ll":
            ll_paths.append(path)
        else:
            raise ValueError(f"Unrecognized file type: {path.name}")

    # Determine from inputs the list of pre-compiled bitcodes and the clang
    # invocations required to compile the bitcodes.
    if isinstance(inputs, str) or isinstance(inputs, Path):
        _add_path(Path(inputs))
    elif isinstance(inputs, ClangInvocation):
        clang_jobs.append(inputs)
    else:
        for input in inputs:
            if isinstance(input, str) or isinstance(input, Path):
                _add_path(Path(input))
            elif isinstance(input, ClangInvocation):
                clang_jobs.append(input)
            else:
                raise TypeError(f"Invalid input type: {type(input).__name__}")

    # Shortcut if we only have a single pre-compiled bitcode.
    if len(bitcodes) == 1 and not clang_jobs and not ll_paths:
        bitcode = bitcodes[0]
        return Benchmark.from_file(uri=f"benchmark://file-v0{bitcode}", path=bitcode)

    tmpdir_root = transient_cache_path(".")
    tmpdir_root.mkdir(exist_ok=True, parents=True)
    with tempfile.TemporaryDirectory(
        dir=tmpdir_root, prefix="llvm-make_benchmark-"
    ) as d:
        working_dir = Path(d)

        clang_outs = [
            working_dir / f"clang-out-{i}.bc" for i in range(1, len(clang_jobs) + 1)
        ]
        llvm_as_outs = [
            working_dir / f"llvm-as-out-{i}.bc" for i in range(1, len(ll_paths) + 1)
        ]

        # Run the clang and llvm-as invocations in parallel. Avoid running this
        # code path if possible as get_thread_pool_executor() requires locking.
        if clang_jobs or ll_paths:
            llvm_as_path = str(llvm.llvm_as_path())
            executor = get_thread_pool_executor()

            llvm_as_commands = [
                [llvm_as_path, str(ll_path), "-o", bc_path]
                for ll_path, bc_path in zip(ll_paths, llvm_as_outs)
            ]

            # Fire off the clang and llvm-as jobs.
            futures = [
                executor.submit(run_command, job.command(out), job.timeout)
                for job, out in zip(clang_jobs, clang_outs)
            ] + [
                executor.submit(run_command, command, timeout)
                for command in llvm_as_commands
            ]

            # Block until finished.
            list(future.result() for future in as_completed(futures))

            # Check that the expected files were generated.
            for clang_job, bc_path in zip(clang_jobs, clang_outs):
                if not bc_path.is_file():
                    raise BenchmarkInitError(
                        f"clang failed: {' '.join(clang_job.command(bc_path))}"
                    )
            for command, bc_path in zip(llvm_as_commands, llvm_as_outs):
                if not bc_path.is_file():
                    raise BenchmarkInitError(f"llvm-as failed: {command}")

        all_outs = bitcodes + clang_outs + llvm_as_outs
        if not all_outs:
            raise ValueError("No inputs")
        elif len(all_outs) == 1:
            # We only have a single bitcode so read it.
            with open(str(all_outs[0]), "rb") as f:
                bitcode = f.read()
        else:
            # Link all of the bitcodes into a single module.
            llvm_link_cmd = [str(llvm.llvm_link_path()), "-o", "-"] + [
                str(path) for path in bitcodes + clang_outs
            ]
            with Popen(
                llvm_link_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
            ) as llvm_link:
                bitcode, stderr = llvm_link.communicate(timeout=timeout)
                if llvm_link.returncode:
                    raise BenchmarkInitError(
                        f"Failed to link LLVM bitcodes with error: {stderr.decode('utf-8')}"
                    )

    timestamp = datetime.now().strftime("%Y%m%HT%H%M%S")
    uri = f"benchmark://user-v0/{timestamp}-{random.randrange(16**4):04x}"
    return Benchmark.from_file_contents(uri, bitcode)
コード例 #12
0
def _compile_and_run_bitcode_file(
    bitcode_file: Path,
    cmd: str,
    cwd: Path,
    linkopts: List[str],
    env: Dict[str, str],
    num_runs: int,
    sanitizer: Optional[LlvmSanitizer] = None,
    timeout_seconds: float = 300,
    compilation_timeout_seconds: float = 60,
) -> BenchmarkExecutionResult:
    """Run the given cBench benchmark."""
    # cBench benchmarks expect that a file _finfo_dataset exists in the
    # current working directory and contains the number of benchmark
    # iterations in it.
    with open(cwd / "_finfo_dataset", "w") as f:
        print(num_runs, file=f)

    # Create a barebones execution environment for the benchmark.
    run_env = {
        "TMPDIR": os.environ.get("TMPDIR", ""),
        "HOME": os.environ.get("HOME", ""),
        "USER": os.environ.get("USER", ""),
        # Disable all logging from GRPC. In the past I have had false-positive
        # "Wrong output" errors caused by GRPC error messages being logged to
        # stderr.
        "GRPC_VERBOSITY": "NONE",
    }
    run_env.update(env)

    error_data = {}

    if sanitizer:
        clang_path = llvm.clang_path()
        binary = cwd / "a.out"
        error_data["run_cmd"] = cmd.replace("$BIN", "./a.out")
        # Generate the a.out binary file.
        compile_cmd = ([clang_path.name,
                        str(bitcode_file), "-o",
                        str(binary)] + _COMPILE_ARGS + list(linkopts) +
                       _SANITIZER_FLAGS.get(sanitizer, []))
        error_data["compile_cmd"] = compile_cmd
        logger.debug("compile: %s", compile_cmd)
        assert not binary.is_file()
        try:
            with Popen(
                    compile_cmd,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    universal_newlines=True,
                    env={
                        "PATH":
                        f"{clang_path.parent}:{os.environ.get('PATH', '')}"
                    },
            ) as clang:
                output, _ = clang.communicate(
                    timeout=compilation_timeout_seconds)
                if clang.returncode:
                    error_data["output"] = output
                    return BenchmarkExecutionResult(
                        walltime_seconds=timeout_seconds,
                        error=ValidationError(
                            type="Compilation failed",
                            data=error_data,
                        ),
                    )
        except subprocess.TimeoutExpired:
            error_data["timeout"] = compilation_timeout_seconds
            return BenchmarkExecutionResult(
                walltime_seconds=timeout_seconds,
                error=ValidationError(
                    type="Compilation timeout",
                    data=error_data,
                ),
            )
        assert binary.is_file()
    else:
        lli_path = llvm.lli_path()
        error_data["run_cmd"] = cmd.replace("$BIN",
                                            f"{lli_path.name} benchmark.bc")
        run_env["PATH"] = str(lli_path.parent)

    logger.debug("exec: %s", error_data["run_cmd"])
    try:
        with Timer() as timer, Popen(
                error_data["run_cmd"],
                shell=True,
                stderr=subprocess.STDOUT,
                stdout=subprocess.PIPE,
                env=run_env,
                cwd=cwd,
        ) as process:
            stdout, _ = process.communicate(timeout=timeout_seconds)
    except subprocess.TimeoutExpired:
        error_data["timeout_seconds"] = timeout_seconds
        return BenchmarkExecutionResult(
            walltime_seconds=timeout_seconds,
            error=ValidationError(
                type="Execution timeout",
                data=error_data,
            ),
        )
    finally:
        if sanitizer:
            binary.unlink()

    try:
        output = stdout.decode("utf-8")
    except UnicodeDecodeError:
        output = "<binary>"

    if process.returncode:
        # Runtime error.
        if sanitizer == LlvmSanitizer.ASAN and "LeakSanitizer" in output:
            error_type = "Memory leak"
        elif sanitizer == LlvmSanitizer.ASAN and "AddressSanitizer" in output:
            error_type = "Memory error"
        elif sanitizer == LlvmSanitizer.MSAN and "MemorySanitizer" in output:
            error_type = "Memory error"
        elif "Segmentation fault" in output:
            error_type = "Segmentation fault"
        elif "Illegal Instruction" in output:
            error_type = "Illegal Instruction"
        else:
            error_type = f"Runtime error ({process.returncode})"

        error_data["return_code"] = process.returncode
        error_data["output"] = output
        return BenchmarkExecutionResult(
            walltime_seconds=timer.time,
            error=ValidationError(
                type=error_type,
                data=error_data,
            ),
        )
    return BenchmarkExecutionResult(walltime_seconds=timer.time, output=output)
コード例 #13
0
def compute_observation(
    observation_space: ObservationSpaceSpec, bitcode: Path, timeout: float = 300
) -> ObservationType:
    """Compute an LLVM observation.

    This is a utility function that uses a standalone C++ binary to compute an
    observation from an LLVM bitcode file. It is intended for use cases where
    you want to compute an observation without the overhead of initializing a
    full environment.

    Example usage:

        >>> env = compiler_gym.make("llvm-v0")
        >>> space = env.observation.spaces["Ir"]
        >>> bitcode = Path("bitcode.bc")
        >>> observation = llvm.compute_observation(space, bitcode, timeout=30)

    .. warning::

        This is not part of the core CompilerGym API and may change in a future
        release.

    :param observation_space: The observation that is to be computed.

    :param bitcode: The path of an LLVM bitcode file.

    :param timeout: The maximum number of seconds to allow the computation to
        run before timing out.

    :raises ValueError: If computing the observation fails.

    :raises TimeoutError: If computing the observation times out.

    :raises FileNotFoundError: If the given bitcode does not exist.
    """
    if not Path(bitcode).is_file():
        raise FileNotFoundError(bitcode)

    observation_space_name = pascal_case_to_enum(observation_space.id)

    try:
        with Popen(
            [str(_COMPUTE_OBSERVATION_BIN), observation_space_name, str(bitcode)],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        ) as process:
            stdout, stderr = process.communicate(timeout=timeout)

            if process.returncode:
                try:
                    stderr = stderr.decode("utf-8")
                    raise ValueError(
                        f"Failed to compute {observation_space.id} observation: {stderr}"
                    )
                except UnicodeDecodeError as e:
                    raise ValueError(
                        f"Failed to compute {observation_space.id} observation"
                    ) from e
    except subprocess.TimeoutExpired as e:
        raise TimeoutError(
            f"Failed to compute {observation_space.id} observation in "
            f"{timeout:.1f} {plural(int(round(timeout)), 'second', 'seconds')}"
        ) from e

    try:
        stdout = stdout.decode("utf-8")
    except UnicodeDecodeError as e:
        raise ValueError(
            f"Failed to parse {observation_space.id} observation: {e}"
        ) from e

    observation = Event()
    try:
        google.protobuf.text_format.Parse(stdout, observation)
    except google.protobuf.text_format.ParseError as e:
        raise ValueError(f"Failed to parse {observation_space.id} observation") from e

    return observation_space.translate(observation)
コード例 #14
0
def test_popen():
    with Popen(["echo"]) as process:
        communicate(process, timeout=60)
    assert process.poll() is not None  # Process is dead.
コード例 #15
0
    def benchmark_from_seed(self,
                            seed: int,
                            max_retries: int = 3,
                            retry_count: int = 0) -> CsmithBenchmark:
        """Get a benchmark from a uint32 seed.

        :param seed: A number in the range 0 <= n < 2^32.

        :return: A benchmark instance.

        :raises OSError: If Csmith fails.

        :raises BenchmarkInitError: If the C program generated by Csmith cannot
            be lowered to LLVM-IR.
        """
        if retry_count >= max_retries:
            raise OSError(
                f"Csmith failed after {retry_count} {plural(retry_count, 'attempt', 'attempts')} "
                f"with seed {seed}")

        self.install()

        # Run csmith with the given seed and pipe the output to clang to
        # assemble a bitcode.
        logger.debug("Exec csmith --seed %d", seed)
        try:
            with Popen(
                [str(self.csmith_bin_path), "--seed",
                 str(seed)],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
            ) as csmith:
                # Generate the C source.
                src, stderr = communicate(csmith, timeout=300)
                if csmith.returncode:
                    try:
                        stderr = "\n".join(
                            truncate(stderr.decode("utf-8"),
                                     max_line_len=200,
                                     max_lines=20))
                        logger.warning("Csmith failed with seed %d: %s", seed,
                                       stderr)
                    except UnicodeDecodeError:
                        # Failed to interpret the stderr output, generate a generic
                        # error message.
                        logger.warning("Csmith failed with seed %d", seed)
                    return self.benchmark_from_seed(seed,
                                                    max_retries=max_retries,
                                                    retry_count=retry_count +
                                                    1)

            # Compile to IR.
            with Popen(
                    self.clang_compile_command,
                    stdin=subprocess.PIPE,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.DEVNULL,
            ) as clang:
                stdout, _ = communicate(clang, input=src, timeout=300)
                if clang.returncode:
                    compile_cmd = " ".join(self.clang_compile_command)
                    raise BenchmarkInitError(f"Compilation job failed!\n"
                                             f"Csmith seed: {seed}\n"
                                             f"Command: {compile_cmd}\n")
        except subprocess.TimeoutExpired:
            raise BenchmarkInitError(
                f"Benchmark generation using seed {seed} timed out")

        return self.benchmark_class.create(f"{self.name}/{seed}", stdout, src)
コード例 #16
0
    def validator_cb(
            env: "LlvmEnv") -> Optional[ValidationError]:  # noqa: F821
        """The validation callback."""
        with _CBENCH_DOWNLOAD_THREAD_LOCK:
            with fasteners.InterProcessLock(
                    cache_path(".cbench-v1-runtime-data.LOCK")):
                download_cBench_runtime_data()

        cbench_data = site_data_path(
            "llvm-v0/cbench-v1-runtime-data/runtime_data")
        for input_file_name in input_files:
            path = cbench_data / input_file_name
            if not path.is_file():
                raise FileNotFoundError(
                    f"Required benchmark input not found: {path}")

        # Create a temporary working directory to execute the benchmark in.
        with tempfile.TemporaryDirectory(
                dir=env.service.connection.cache.path) as d:
            cwd = Path(d)

            # Expand shell variable substitutions in the benchmark command.
            expanded_command = cmd.replace("$D", str(cbench_data))

            # Translate the output file names into paths inside the working
            # directory.
            output_paths = [cwd / o for o in output_files]

            if pre_execution_callback:
                pre_execution_callback(cwd)

            # Produce a gold-standard output using a reference version of
            # the benchmark.
            if compare_output or output_files:
                gs_env = env.fork()
                try:
                    # Reset to the original benchmark state and compile it.
                    gs_env.reset(benchmark=env.benchmark)
                    gs_env.write_bitcode(cwd / "benchmark.bc")
                    gold_standard = _compile_and_run_bitcode_file(
                        bitcode_file=cwd / "benchmark.bc",
                        cmd=expanded_command,
                        cwd=cwd,
                        num_runs=1,
                        # Use default optimizations for gold standard.
                        linkopts=linkopts + ["-O2"],
                        # Always assume safe.
                        sanitizer=None,
                        env=os_env,
                    )
                    if gold_standard.error:
                        return ValidationError(
                            type=f"Gold standard: {gold_standard.error.type}",
                            data=gold_standard.error.data,
                        )
                finally:
                    gs_env.close()

                # Check that the reference run produced the expected output
                # files.
                for path in output_paths:
                    if not path.is_file():
                        try:
                            output = gold_standard.output
                        except UnicodeDecodeError:
                            output = "<binary>"
                        raise FileNotFoundError(
                            f"Expected file '{path.name}' not generated\n"
                            f"Benchmark: {env.benchmark}\n"
                            f"Command: {cmd}\n"
                            f"Output: {output}")
                    path.rename(f"{path}.gold_standard")

            # Serialize the benchmark to a bitcode file that will then be
            # compiled to a binary.
            env.write_bitcode(cwd / "benchmark.bc")
            outcome = _compile_and_run_bitcode_file(
                bitcode_file=cwd / "benchmark.bc",
                cmd=expanded_command,
                cwd=cwd,
                num_runs=num_runs,
                linkopts=linkopts,
                sanitizer=sanitizer,
                env=os_env,
            )

            if outcome.error:
                return outcome.error

            # Run a user-specified validation hook.
            if validate_result:
                validate_result(outcome)

            # Difftest the console output.
            if compare_output and gold_standard.output != outcome.output:
                return ValidationError(
                    type="Wrong output",
                    data={
                        "expected": gold_standard.output,
                        "actual": outcome.output
                    },
                )

            # Difftest the output files.
            for path in output_paths:
                if not path.is_file():
                    return ValidationError(
                        type="Output not generated",
                        data={
                            "path": path.name,
                            "command": cmd
                        },
                    )
                with Popen(
                    ["diff", str(path), f"{path}.gold_standard"],
                        stdout=subprocess.PIPE,
                        stderr=subprocess.STDOUT,
                ) as diff:
                    stdout, _ = diff.communicate(timeout=300)
                    if diff.returncode:
                        try:
                            stdout = stdout.decode("utf-8")
                            return ValidationError(
                                type="Wrong output (file)",
                                data={
                                    "path": path.name,
                                    "diff": stdout
                                },
                            )
                        except UnicodeDecodeError:
                            return ValidationError(
                                type="Wrong output (file)",
                                data={
                                    "path": path.name,
                                    "diff": "<binary>"
                                },
                            )
コード例 #17
0
    def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark:
        self.install()

        # The absolute path of the file, without an extension.
        path_stem = os.path.normpath(f"{self.dataset_root}/{uri.path}")

        # If the file does not exist, compile it on-demand.
        bitcode_path = Path(f"{path_stem}.bc")
        cc_file_path = Path(f"{path_stem}.txt")

        if not bitcode_path.is_file():
            if not cc_file_path.is_file():
                raise LookupError(
                    f"Benchmark not found: {uri} (file not found: {cc_file_path})"
                )

            # Load the C++ source into memory and pre-process it.
            with open(cc_file_path) as f:
                src = self.preprocess_poj104_source(f.read())

            # Compile the C++ source into a bitcode file.
            with atomic_file_write(bitcode_path) as tmp_bitcode_path:
                compile_cmd = ClangInvocation.from_c_file(
                    "-",
                    copt=[
                        "-xc++",
                        "-ferror-limit=1",  # Stop on first error.
                        "-w",  # No warnings.
                        # Some of the programs use the gets() function that was
                        # deprecated in C++11 and removed in C++14.
                        "-std=c++11",
                    ],
                ).command(outpath=tmp_bitcode_path)
                logger.debug("Exec %s", compile_cmd)
                try:
                    with Popen(
                        compile_cmd,
                        stdin=subprocess.PIPE,
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE,
                    ) as clang:
                        _, stderr = clang.communicate(
                            input=src.encode("utf-8"), timeout=300
                        )
                except subprocess.TimeoutExpired:
                    raise BenchmarkInitError(f"Benchmark compilation timed out: {uri}")

            if clang.returncode:
                compile_cmd = " ".join(compile_cmd)
                error = truncate(stderr.decode("utf-8"), max_lines=20, max_line_len=100)
                if tmp_bitcode_path.is_file():
                    tmp_bitcode_path.unlink()
                raise BenchmarkInitError(
                    f"Compilation job failed!\n"
                    f"Command: {compile_cmd}\n"
                    f"Error: {error}"
                )

            if not bitcode_path.is_file():
                raise BenchmarkInitError(
                    f"Compilation job failed to produce output file!\nCommand: {compile_cmd}"
                )

        return BenchmarkWithSource.create(uri, bitcode_path, "source.cc", cc_file_path)
コード例 #18
0
    def benchmark_from_seed(
        self, seed: int, max_retries: int = 3, retry_count: int = 0
    ) -> CsmithBenchmark:
        """Get a benchmark from a uint32 seed.

        :param seed: A number in the range 0 <= n < 2^32.

        :return: A benchmark instance.

        :raises OSError: If Csmith fails.

        :raises BenchmarkInitError: If the C program generated by Csmith cannot
            be lowered to LLVM-IR.
        """
        if retry_count >= max_retries:
            raise OSError(
                f"Csmith failed after {retry_count} {plural(retry_count, 'attempt', 'attempts')} "
                f"with seed {seed}"
            )

        self.install()

        # Run csmith with the given seed and pipe the output to clang to
        # assemble a bitcode.
        logger.debug("Exec csmith --seed %d", seed)
        with Popen(
            [str(self.csmith_bin_path), "--seed", str(seed)],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        ) as csmith:
            # Generate the C source.
            src, stderr = csmith.communicate(timeout=300)

            if csmith.returncode:
                try:
                    stderr = "\n".join(
                        truncate(stderr.decode("utf-8"), max_line_len=200, max_lines=20)
                    )
                    logger.warning("Csmith failed with seed %d: %s", seed, stderr)
                except UnicodeDecodeError:
                    # Failed to interpret the stderr output, generate a generic
                    # error message.
                    logger.warning("Csmith failed with seed %d", seed)
                return self.benchmark_from_seed(
                    seed, max_retries=max_retries, retry_count=retry_count + 1
                )

        # Pre-process the source.
        with tempfile.TemporaryDirectory() as tmpdir:
            src_file = f"{tmpdir}/src.c"
            with open(src_file, "wb") as f:
                f.write(src)

            preprocessed_src = self.gcc(
                "-E",
                "-I",
                str(self.site_data_path / "includes"),
                "-o",
                "-",
                src_file,
                cwd=tmpdir,
                timeout=60,
                volumes={
                    str(self.site_data_path / "includes"): {
                        "bind": str(self.site_data_path / "includes"),
                        "mode": "ro",
                    }
                },
            )

        return self.benchmark_class.create(
            f"{self.name}/{seed}", preprocessed_src.encode("utf-8"), src
        )