Beispiel #1
0
def from_xla_hlo_proto(
    hlos: Union[HloProto, Iterable[HloProto]],
    timeout=300,
    executor: Optional[ExecutorLike] = None,
    chunksize: Optional[int] = None,
) -> Union[ProgramGraph, Iterable[ProgramGraph]]:
    """Construct a Program Graph from an XLA HLO protocol buffer.

    :param hlos: A :code:`HloProto`, or an iterable sequence of :code:`HloProto`
        instances.

    :param timeout: The maximum number of seconds to wait for an individual
        graph construction invocation before raising an error. If multiple
        inputs are provided, this timeout is per-input.

    :param executor: An executor object, with method :code:`submit(callable,
        *args, **kwargs)` and returning a Future-like object with methods
        :code:`done() -> bool` and :code:`result() -> float`. The executor role
        is to dispatch the execution of the jobs locally/on a cluster/with
        multithreading depending on the implementation. Eg:
        :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single
        threaded execution. This is only used when multiple inputs are given.

    :param chunksize: The number of inputs to read and process at a time. A
        larger chunksize improves parallelism but increases memory consumption
        as more inputs must be stored in memory.

    :return: If :code:`hlos` is a single input, returns a single
        :code:`programl.ProgramGraph` instance. Else returns a generator over
        :code:`programl.ProgramGraph` instances.

    :raises GraphCreationError: If graph construction fails.

    :raises TimeoutError: If the specified timeout is reached.
    """

    def _run_one(hlo: HloProto) -> ProgramGraph:
        process = subprocess.Popen(
            [XLA2GRAPH, "--stdin_fmt=pb", "--stdout_fmt=pb"],
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )

        try:
            stdout, stderr = process.communicate(
                hlo.SerializeToString(), timeout=timeout
            )
        except subprocess.TimeoutExpired as e:
            raise TimeoutError(str(e)) from e

        return _graph_from_subprocess(process, stdout, stderr)

    if isinstance(hlos, HloProto):
        return _run_one(hlos)
    return execute(_run_one, hlos, executor, chunksize)
Beispiel #2
0
def to_networkx(
    graphs: Union[ProgramGraph, Iterable[ProgramGraph]],
    timeout: int = 300,
    executor: Optional[ExecutorLike] = None,
    chunksize: Optional[int] = None,
) -> Union[nx.MultiDiGraph, Iterable[nx.MultiDiGraph]]:
    """Convert one or more Program Graphs to `NetworkX MultiDiGraphs
    <https://networkx.org/documentation/stable/reference/classes/multidigraph.html>`_.

    :param graphs: A Program Graph, or a sequence of Program Graphs.

    :param timeout: The maximum number of seconds to wait for an individual
        graph conversion before raising an error. If multiple inputs are
        provided, this timeout is per-input.

    :param executor: An executor object, with method :code:`submit(callable,
        *args, **kwargs)` and returning a Future-like object with methods
        :code:`done() -> bool` and :code:`result() -> float`. The executor role
        is to dispatch the execution of the jobs locally/on a cluster/with
        multithreading depending on the implementation. Eg:
        :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single
        threaded execution. This is only used when multiple inputs are given.

    :param chunksize: The number of inputs to read and process at a time. A
        larger chunksize improves parallelism but increases memory consumption
        as more inputs must be stored in memory. This is only used when multiple
        inputs are given.

    :return: If a single input is provided, return a single :code:`nx.MultiDiGraph`.
        Else returns an iterable sequence of :code:`nx.MultiDiGraph` instances.

    :raises GraphTransformError: If graph conversion fails.

    :raises TimeoutError: If the specified timeout is reached.
    """
    def _run_one(json_data):
        return nx_json.node_link_graph(json_data,
                                       multigraph=True,
                                       directed=True)

    if isinstance(graphs, ProgramGraph):
        return _run_one(to_json(graphs, timeout=timeout))
    return execute(
        _run_one,
        to_json(graphs,
                timeout=timeout,
                executor=executor,
                chunksize=chunksize),
        executor,
        chunksize,
    )
Beispiel #3
0
def to_json(
    graphs: Union[ProgramGraph, Iterable[ProgramGraph]],
    timeout: int = 300,
    executor: Optional[ExecutorLike] = None,
    chunksize: Optional[int] = None,
) -> Union[JsonDict, Iterable[JsonDict]]:
    """Convert one or more Program Graphs to JSON node-link data.

    :param graphs: A Program Graph, or a sequence of Program Graphs.

    :param timeout: The maximum number of seconds to wait for an individual
        graph conversion before raising an error. If multiple inputs are
        provided, this timeout is per-input.

    :param executor: An executor object, with method :code:`submit(callable,
        *args, **kwargs)` and returning a Future-like object with methods
        :code:`done() -> bool` and :code:`result() -> float`. The executor role
        is to dispatch the execution of the jobs locally/on a cluster/with
        multithreading depending on the implementation. Eg:
        :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single
        threaded execution. This is only used when multiple inputs are given.

    :param chunksize: The number of inputs to read and process at a time. A
        larger chunksize improves parallelism but increases memory consumption
        as more inputs must be stored in memory. This is only used when multiple
        inputs are given.

    :return: If a single input is provided, return a single JSON dictionary.
        Else returns an iterable sequence of JSON dictionaries.

    :raises GraphTransformError: If graph conversion fails.

    :raises TimeoutError: If the specified timeout is reached.
    """
    def _run_one(graph: ProgramGraph):
        try:
            return json.loads(
                _run_graph_transform_binary(
                    GRAPH2JSON,
                    graph,
                    timeout,
                ))
        except json.JSONDecodeError as e:
            raise GraphTransformError(str(e)) from e

    if isinstance(graphs, ProgramGraph):
        return _run_one(graphs)
    return execute(_run_one, graphs, executor, chunksize)
Beispiel #4
0
def to_dot(
    graphs: Union[ProgramGraph, Iterable[ProgramGraph]],
    timeout: int = 300,
    executor: Optional[ExecutorLike] = None,
    chunksize: Optional[int] = None,
) -> Union[str, Iterable[str]]:
    """Convert one or more Program Graphs to DOT Graph Description Language.

    This produces a DOT source string representing the input graph. This can
    then be rendered using the graphviz command line tools, or parsed using
    `pydot <https://pypi.org/project/pydot/>`_.

    :param graphs: A Program Graph, or a sequence of Program Graphs.

    :param timeout: The maximum number of seconds to wait for an individual
        graph conversion before raising an error. If multiple inputs are
        provided, this timeout is per-input.

    :param executor: An executor object, with method :code:`submit(callable,
        *args, **kwargs)` and returning a Future-like object with methods
        :code:`done() -> bool` and :code:`result() -> float`. The executor role
        is to dispatch the execution of the jobs locally/on a cluster/with
        multithreading depending on the implementation. Eg:
        :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single
        threaded execution. This is only used when multiple inputs are given.

    :param chunksize: The number of inputs to read and process at a time. A
        larger chunksize improves parallelism but increases memory consumption
        as more inputs must be stored in memory. This is only used when multiple
        inputs are given.

    :return: A graphviz dot string when a single input is provided, else an
        iterable sequence of graphviz dot strings.

    :raises GraphTransformError: If graph conversion fails.

    :raises TimeoutError: If the specified timeout is reached.
    """
    def _run_one(graph: ProgramGraph) -> str:
        return _run_graph_transform_binary(GRAPH2DOT, graph,
                                           timeout).decode("utf-8")

    if isinstance(graphs, ProgramGraph):
        return _run_one(graphs)
    return execute(_run_one, graphs, executor, chunksize)
Beispiel #5
0
def from_cpp(
    srcs: Union[str, Iterable[str]],
    copts: Optional[List[str]] = None,
    system_includes: bool = True,
    language: str = "c++",
    version: str = "10",
    timeout=300,
    executor: Optional[ExecutorLike] = None,
    chunksize: Optional[int] = None,
) -> Union[ProgramGraph, Iterable[ProgramGraph]]:
    """Construct a Program Graph from a string of C/C++ code.

    This is a convenience function for generating graphs of simple single-file
    code snippets. For example:

        >>> programl.from_cpp(\"\"\" ... #include <stdio.h>
        ...
        ... int main() {
        ...   printf("Hello, ProGraML!");
        ...   return 0;
        ... }
        ... \"\"\")

    This is equivalent to invoking clang with input over stdin:

    .. code-block::

        cat <<EOF | clang -xc++ - -c -o -
        #include <stdio.h>

        int main() {
            printf("Hello, ProGraML!");
            return 0;
        }
        EOF

    For more control over the clang invocation, see :func:`from_clang`.

    :param srcs: A string of C / C++, or an iterable sequence of strings of C /
        C++.

    :param copts: A list of additional command line arguments to pass to clang.

    :param system_includes: Detect and pass :code:`-isystem` arguments to clang
        using the default search path of the system compiler. See
        :func:`get_system_includes()
        <programl.util.py.cc_system_includes.get_system_includes>` for details.

    :param language: The programming language of :code:`srcs`. Must be either
        :code:`c++` or :code:`c`.

    :param version: The version of clang to use. See
        :code:`programl.CLANG_VERSIONS` for a list of available versions.

    :param timeout: The maximum number of seconds to wait for an individual
        clang invocation before raising an error. If multiple :code:`srcs`
        inputs are provided, this timeout is per-input.

    :param executor: An executor object, with method :code:`submit(callable,
        *args, **kwargs)` and returning a Future-like object with methods
        :code:`done() -> bool` and :code:`result() -> float`. The executor role
        is to dispatch the execution of the jobs locally/on a cluster/with
        multithreading depending on the implementation. Eg:
        :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single
        threaded execution. This is only used when multiple inputs are given.

    :param chunksize: The number of inputs to read and process at a time. A
        larger chunksize improves parallelism but increases memory consumption
        as more inputs must be stored in memory.

    :return: If :code:`srcs` is singular, returns a single
        :code:`programl.ProgramGraph` instance. Else returns a generator over
        :code:`programl.ProgramGraph` instances.

    :raises UnsupportedCompiler: If the requested compiler version is not
        supported.

    :raises GraphCreationError: If compilation of the input fails.

    :raises TimeoutError: If the specified timeout is reached.
    """
    copts = copts or []
    binary = CLANG2GRAPH_BINARIES.get(version)
    if not binary:
        raise UnsupportedCompiler(
            f"Unknown clang version: {version}. "
            f"Supported versions: {sorted(CLANG2GRAPH_BINARIES.keys())}"
        )

    if system_includes:
        for directory in get_system_includes():
            copts += ["-isystem", str(directory)]

    def _run_one(src: str):
        process = subprocess.Popen(
            [binary, f"-x{language}", "-"] + copts,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )

        try:
            stdout, stderr = process.communicate(src.encode("utf-8"), timeout=timeout)
        except subprocess.TimeoutExpired as e:
            raise TimeoutError(str(e)) from e

        return _graph_from_subprocess(process, stdout, stderr)

    if isinstance(srcs, str):
        return _run_one(srcs)
    return execute(_run_one, srcs, executor, chunksize)
Beispiel #6
0
def from_llvm_ir(
    irs: Union[str, Iterable[str]],
    timeout=300,
    version: str = "10",
    executor: Optional[ExecutorLike] = None,
    chunksize: Optional[int] = None,
) -> Union[ProgramGraph, Iterable[ProgramGraph]]:
    """Construct a Program Graph from a string of LLVM-IR.

    This takes as input one or more LLVM-IR strings as generated by
    :code:`llvm-dis` from a bitcode file, or from :code:`clang` using arguments:
    :code:`-emit-llvm -S`.

    Example usage:

        >>> programl.from_llvm_ir(\"\"\"
        ... source_filename = "-"
        ... target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
        ... target triple = "x86_64-apple-macosx11.0.0"
        ...
        ... ; ...
        ... \"\"\")

    Multiple inputs can be passed in a single invocation to be batched and
    processed in parallel. For example:

        >>> with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
        ...     graphs = programl.from_llvm_ir(llvm_ir_strings, executor=executor)

    :param irs: A string of LLVM-IR, or an iterable sequence of LLVM-IR strings.

    :param version: The version of LLVM to use. See
        :code:`programl.LLVM_VERSIONS` for a list of available versions.

    :param timeout: The maximum number of seconds to wait for an individual
        graph construction invocation before raising an error. If multiple
        inputs are provided, this timeout is per-input.

    :param executor: An executor object, with method :code:`submit(callable,
        *args, **kwargs)` and returning a Future-like object with methods
        :code:`done() -> bool` and :code:`result() -> float`. The executor role
        is to dispatch the execution of the jobs locally/on a cluster/with
        multithreading depending on the implementation. Eg:
        :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single
        threaded execution. This is only used when multiple inputs are given.

    :param chunksize: The number of inputs to read and process at a time. A
        larger chunksize improves parallelism but increases memory consumption
        as more inputs must be stored in memory.

    :return: If :code:`irs` is a single IR, returns a single
        :code:`programl.ProgramGraph` instance. Else returns a generator over
        :code:`programl.ProgramGraph` instances.

    :raises UnsupportedCompiler: If the requested LLVM version is not supported.

    :raises GraphCreationError: If graph construction fails.

    :raises TimeoutError: If the specified timeout is reached.
    """
    binary = LLVM2GRAPH_BINARIES.get(version)
    if not binary:
        raise UnsupportedCompiler(
            f"Unknown llvm version: {version}. "
            f"Supported versions: {sorted(LLVM2GRAPH_BINARIES.keys())}"
        )

    def _run_one(ir: str):
        process = subprocess.Popen(
            [binary, "--stdout_fmt=pb"],
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )

        try:
            stdout, stderr = process.communicate(ir.encode("utf-8"), timeout=timeout)
        except subprocess.TimeoutExpired as e:
            raise TimeoutError(str(e)) from e

        return _graph_from_subprocess(process, stdout, stderr)

    if isinstance(irs, str):
        return _run_one(irs)
    return execute(_run_one, irs, executor, chunksize)
Beispiel #7
0
def from_clang(
    args: Union[List[str], Iterable[List[str]]],
    system_includes: bool = True,
    version: str = "10",
    timeout=300,
    executor: Optional[ExecutorLike] = None,
    chunksize: Optional[int] = None,
) -> Union[ProgramGraph, Iterable[ProgramGraph]]:
    """Run clang and construct a Program Graph from the output.

    Example usage:

        >>> programl.from_clang(["/path/to/my/app.c", "-DMY_MACRO=3"])

    This is equivalent to invoking clang as:

    .. code-block::

        clang -c /path/to/my/app.c -DMY_MACRO=3

    Multiple inputs can be passed in a single invocation to be batched and
    processed in parallel. For example:

        >>> with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
        ...     programl.from_clang(
        ...         ["a.cc", "-DMY_MACRO=3"],
        ...         ["b.cpp"],
        ...         ["c.c", "-O3", "-std=c99"],
        ...         executor=executor,
        ...     )


    :param args: A list of arguments to pass to clang, or an iterable sequence
        of arguments to pass to clang.

    :param system_includes: Detect and pass :code:`-isystem` arguments to clang
        using the default search path of the system compiler. See
        :func:`get_system_includes()
        <programl.util.py.cc_system_includes.get_system_includes>` for details.

    :param version: The version of clang to use. See
        :code:`programl.CLANG_VERSIONS` for a list of available versions.

    :param timeout: The maximum number of seconds to wait for an individual
        clang invocation before raising an error. If multiple inputs are
        provided, this timeout is per-input.

    :param executor: An executor object, with method :code:`submit(callable,
        *args, **kwargs)` and returning a Future-like object with methods
        :code:`done() -> bool` and :code:`result() -> float`. The executor role
        is to dispatch the execution of the jobs locally/on a cluster/with
        multithreading depending on the implementation. Eg:
        :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single
        threaded execution. This is only used when multiple inputs are given.

    :param chunksize: The number of inputs to read and process at a time. A
        larger chunksize improves parallelism but increases memory consumption
        as more inputs must be stored in memory.

    :return: If :code:`args` is a single list of arguments, returns a single
        :code:`programl.ProgramGraph` instance. Else returns a generator over
        :code:`programl.ProgramGraph` instances.

    :raises UnsupportedCompiler: If the requested compiler version is not
        supported.

    :raises GraphCreationError: If compilation of the input fails.

    :raises TimeoutError: If the specified timeout is reached.
    """
    binary = CLANG2GRAPH_BINARIES.get(version)
    if not binary:
        raise UnsupportedCompiler(
            f"Unknown clang version: {version}. "
            f"Supported versions: {sorted(CLANG2GRAPH_BINARIES.keys())}"
        )

    extra_copts = []
    if system_includes:
        for directory in get_system_includes():
            extra_copts += ["-isystem", str(directory)]

    def _run_one(one_args):
        process = subprocess.Popen(
            [binary] + one_args + extra_copts,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )

        try:
            stdout, stderr = process.communicate(timeout=timeout)
        except subprocess.TimeoutExpired as e:
            raise TimeoutError(str(e)) from e

        return _graph_from_subprocess(process, stdout, stderr)

    if isinstance(args, list) and args and isinstance(args[0], str):
        return _run_one(args)
    return execute(_run_one, args, executor, chunksize)