Exemple #1
0
 def compile_all(self):
     n = self.size
     executor = thread_pool.get_thread_pool_executor()
     # Since the dataset is lazily compiled, simply iterating over the full
     # set of URIs will compile everything. Do this in parallel.
     futures = (executor.submit(self.benchmark, uri)
                for uri in self.benchmark_uris())
     for i, future in enumerate(as_completed(futures), start=1):
         future.result()
         print(
             f"\r\033[KCompiled {i} of {n} programs ({i/n:.1%} complete)",
             flush=True,
             end="",
         )
    def ivalidate(self, env: "CompilerEnv") -> Iterable[ValidationError]:  # noqa: F821
        """Run the validation callbacks and return a generator of errors.

        This is an asynchronous version of :meth:`validate()
        <compiler_gym.datasets.Benchmark.validate>` that returns immediately.

        :parameter env: A :class:`CompilerEnv <compiler_gym.envs.CompilerEnv>`
            instance to validate.

        :return: A generator of :class:`ValidationError
            <compiler_gym.ValidationError>` tuples that occur during validation.
        """
        executor = thread_pool.get_thread_pool_executor()
        futures = (
            executor.submit(validator, env) for validator in self.validation_callbacks()
        )
        for future in as_completed(futures):
            result: Iterable[ValidationError] = future.result()
            if result:
                yield from result
def validate_states(
    make_env: Callable[[], CompilerEnv],
    states: Iterable[CompilerEnvState],
    nproc: Optional[int] = None,
    inorder: bool = False,
) -> Iterable[ValidationResult]:
    """A parallelized implementation of
    :meth:`env.validate() <compiler_gym.envs.CompilerEnv.validate>` for batched
    validation.

    :param make_env: A callback which instantiates a compiler environment.
    :param states: A sequence of compiler environment states to validate.
    :param nproc: The number of parallel worker processes to run.
    :param inorder: Whether to return results in the order they were provided,
        or in the order that they are available.
    :return: An iterator over validation results. The order of results may
        differ from the input states.
    """
    executor = thread_pool.get_thread_pool_executor()

    if nproc == 1:
        map_func = map
    elif inorder:
        map_func = executor.map
    else:
        # The validation function of benchmarks can vary wildly in computational
        # demands. Shuffle the order of states (unless explicitly asked for them
        # to be kept inorder) as crude load balancing for the case where
        # multiple states are provided for each benchmark.
        states = list(states)
        random.shuffle(states)

        def map_func(func, envs, states):
            futures = (executor.submit(func, env, state)
                       for env, state in zip(envs, states))
            return (r.result() for r in as_completed(futures))

    yield from map_func(_validate_states_worker, [make_env] * len(states),
                        states)
def make_benchmark(
    inputs: Union[str, Path, ClangInvocation, List[Union[str, Path, ClangInvocation]]],
    copt: Optional[List[str]] = None,
    system_includes: bool = True,
    timeout: int = 600,
) -> Benchmark:
    """Create a benchmark for use by LLVM environments.

    This function takes one or more inputs and uses them to create an LLVM
    bitcode benchmark that can be passed to
    :meth:`compiler_gym.envs.LlvmEnv.reset`.

    The following input types are supported:

    +-----------------------------------------------------+---------------------+-------------------------------------------------------------+
    | **File Suffix**                                     | **Treated as**      | **Converted using**                                         |
    +-----------------------------------------------------+---------------------+-------------------------------------------------------------+
    | :code:`.bc`                                         | LLVM IR bitcode     | No conversion required.                                     |
    +-----------------------------------------------------+---------------------+-------------------------------------------------------------+
    | :code:`.ll`                                         | LLVM IR text format | Assembled to bitcode using llvm-as.                         |
    +-----------------------------------------------------+---------------------+-------------------------------------------------------------+
    | :code:`.c`, :code:`.cc`, :code:`.cpp`, :code:`.cxx` | C / C++ source      | Compiled to bitcode using clang and the given :code:`copt`. |
    +-----------------------------------------------------+---------------------+-------------------------------------------------------------+

    .. note::

        The LLVM IR format has no compatability guarantees between versions (see
        `LLVM docs
        <https://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility>`_).
        You must ensure that any :code:`.bc` and :code:`.ll` files are
        compatible with the LLVM version used by CompilerGym, which can be
        reported using :func:`env.compiler_version
        <compiler_gym.envs.CompilerEnv.compiler_version>`.

    E.g. for single-source C/C++ programs, you can pass the path of the source
    file:

        >>> benchmark = make_benchmark('my_app.c')
        >>> env = gym.make("llvm-v0")
        >>> env.reset(benchmark=benchmark)

    The clang invocation used is roughly equivalent to:

    .. code-block::

        $ clang my_app.c -O0 -c -emit-llvm -o benchmark.bc

    Additional compile-time arguments to clang can be provided using the
    :code:`copt` argument:

        >>> benchmark = make_benchmark('/path/to/my_app.cpp', copt=['-O2'])

    If you need more fine-grained control over the options, you can directly
    construct a :class:`ClangInvocation
    <compiler_gym.envs.llvm.ClangInvocation>` to pass a list of arguments to
    clang:

        >>> benchmark = make_benchmark(
            ClangInvocation(['/path/to/my_app.c'], system_includes=False, timeout=10)
        )

    For multi-file programs, pass a list of inputs that will be compiled
    separately and then linked to a single module:

        >>> benchmark = make_benchmark([
            'main.c',
            'lib.cpp',
            'lib2.bc',
            'foo/input.bc'
        ])

    :param inputs: An input, or list of inputs.

    :param copt: A list of command line options to pass to clang when compiling
        source files.

    :param system_includes: Whether to include the system standard libraries
        during compilation jobs. This requires a system toolchain. See
        :func:`get_system_library_flags`.

    :param timeout: The maximum number of seconds to allow clang to run before
        terminating.

    :return: A :code:`Benchmark` instance.

    :raises FileNotFoundError: If any input sources are not found.

    :raises TypeError: If the inputs are of unsupported types.

    :raises OSError: If a suitable compiler cannot be found.

    :raises BenchmarkInitError: If a compilation job fails.

    :raises TimeoutExpired: If a compilation job exceeds :code:`timeout`
        seconds.
    """
    copt = copt or []

    bitcodes: List[Path] = []
    clang_jobs: List[ClangInvocation] = []
    ll_paths: List[Path] = []

    def _add_path(path: Path):
        if not path.is_file():
            raise FileNotFoundError(path)

        if path.suffix == ".bc":
            bitcodes.append(path.absolute())
        elif path.suffix in {".c", ".cc", ".cpp", ".cxx"}:
            clang_jobs.append(
                ClangInvocation.from_c_file(
                    path, copt=copt, system_includes=system_includes, timeout=timeout
                )
            )
        elif path.suffix == ".ll":
            ll_paths.append(path)
        else:
            raise ValueError(f"Unrecognized file type: {path.name}")

    # Determine from inputs the list of pre-compiled bitcodes and the clang
    # invocations required to compile the bitcodes.
    if isinstance(inputs, str) or isinstance(inputs, Path):
        _add_path(Path(inputs))
    elif isinstance(inputs, ClangInvocation):
        clang_jobs.append(inputs)
    else:
        for input in inputs:
            if isinstance(input, str) or isinstance(input, Path):
                _add_path(Path(input))
            elif isinstance(input, ClangInvocation):
                clang_jobs.append(input)
            else:
                raise TypeError(f"Invalid input type: {type(input).__name__}")

    # Shortcut if we only have a single pre-compiled bitcode.
    if len(bitcodes) == 1 and not clang_jobs and not ll_paths:
        bitcode = bitcodes[0]
        return Benchmark.from_file(uri=f"benchmark://file-v0{bitcode}", path=bitcode)

    tmpdir_root = transient_cache_path(".")
    tmpdir_root.mkdir(exist_ok=True, parents=True)
    with tempfile.TemporaryDirectory(
        dir=tmpdir_root, prefix="llvm-make_benchmark-"
    ) as d:
        working_dir = Path(d)

        clang_outs = [
            working_dir / f"clang-out-{i}.bc" for i in range(1, len(clang_jobs) + 1)
        ]
        llvm_as_outs = [
            working_dir / f"llvm-as-out-{i}.bc" for i in range(1, len(ll_paths) + 1)
        ]

        # Run the clang and llvm-as invocations in parallel. Avoid running this
        # code path if possible as get_thread_pool_executor() requires locking.
        if clang_jobs or ll_paths:
            llvm_as_path = str(llvm.llvm_as_path())
            executor = get_thread_pool_executor()

            llvm_as_commands = [
                [llvm_as_path, str(ll_path), "-o", bc_path]
                for ll_path, bc_path in zip(ll_paths, llvm_as_outs)
            ]

            # Fire off the clang and llvm-as jobs.
            futures = [
                executor.submit(run_command, job.command(out), job.timeout)
                for job, out in zip(clang_jobs, clang_outs)
            ] + [
                executor.submit(run_command, command, timeout)
                for command in llvm_as_commands
            ]

            # Block until finished.
            list(future.result() for future in as_completed(futures))

            # Check that the expected files were generated.
            for clang_job, bc_path in zip(clang_jobs, clang_outs):
                if not bc_path.is_file():
                    raise BenchmarkInitError(
                        f"clang failed: {' '.join(clang_job.command(bc_path))}"
                    )
            for command, bc_path in zip(llvm_as_commands, llvm_as_outs):
                if not bc_path.is_file():
                    raise BenchmarkInitError(f"llvm-as failed: {command}")

        all_outs = bitcodes + clang_outs + llvm_as_outs
        if not all_outs:
            raise ValueError("No inputs")
        elif len(all_outs) == 1:
            # We only have a single bitcode so read it.
            with open(str(all_outs[0]), "rb") as f:
                bitcode = f.read()
        else:
            # Link all of the bitcodes into a single module.
            llvm_link_cmd = [str(llvm.llvm_link_path()), "-o", "-"] + [
                str(path) for path in bitcodes + clang_outs
            ]
            with Popen(
                llvm_link_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
            ) as llvm_link:
                bitcode, stderr = llvm_link.communicate(timeout=timeout)
                if llvm_link.returncode:
                    raise BenchmarkInitError(
                        f"Failed to link LLVM bitcodes with error: {stderr.decode('utf-8')}"
                    )

    timestamp = datetime.now().strftime("%Y%m%HT%H%M%S")
    uri = f"benchmark://user-v0/{timestamp}-{random.randrange(16**4):04x}"
    return Benchmark.from_file_contents(uri, bitcode)