def from_xla_hlo_proto( hlos: Union[HloProto, Iterable[HloProto]], timeout=300, executor: Optional[ExecutorLike] = None, chunksize: Optional[int] = None, ) -> Union[ProgramGraph, Iterable[ProgramGraph]]: """Construct a Program Graph from an XLA HLO protocol buffer. :param hlos: A :code:`HloProto`, or an iterable sequence of :code:`HloProto` instances. :param timeout: The maximum number of seconds to wait for an individual graph construction invocation before raising an error. If multiple inputs are provided, this timeout is per-input. :param executor: An executor object, with method :code:`submit(callable, *args, **kwargs)` and returning a Future-like object with methods :code:`done() -> bool` and :code:`result() -> float`. The executor role is to dispatch the execution of the jobs locally/on a cluster/with multithreading depending on the implementation. Eg: :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single threaded execution. This is only used when multiple inputs are given. :param chunksize: The number of inputs to read and process at a time. A larger chunksize improves parallelism but increases memory consumption as more inputs must be stored in memory. :return: If :code:`hlos` is a single input, returns a single :code:`programl.ProgramGraph` instance. Else returns a generator over :code:`programl.ProgramGraph` instances. :raises GraphCreationError: If graph construction fails. :raises TimeoutError: If the specified timeout is reached. """ def _run_one(hlo: HloProto) -> ProgramGraph: process = subprocess.Popen( [XLA2GRAPH, "--stdin_fmt=pb", "--stdout_fmt=pb"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) try: stdout, stderr = process.communicate( hlo.SerializeToString(), timeout=timeout ) except subprocess.TimeoutExpired as e: raise TimeoutError(str(e)) from e return _graph_from_subprocess(process, stdout, stderr) if isinstance(hlos, HloProto): return _run_one(hlos) return execute(_run_one, hlos, executor, chunksize)
def to_networkx( graphs: Union[ProgramGraph, Iterable[ProgramGraph]], timeout: int = 300, executor: Optional[ExecutorLike] = None, chunksize: Optional[int] = None, ) -> Union[nx.MultiDiGraph, Iterable[nx.MultiDiGraph]]: """Convert one or more Program Graphs to `NetworkX MultiDiGraphs <https://networkx.org/documentation/stable/reference/classes/multidigraph.html>`_. :param graphs: A Program Graph, or a sequence of Program Graphs. :param timeout: The maximum number of seconds to wait for an individual graph conversion before raising an error. If multiple inputs are provided, this timeout is per-input. :param executor: An executor object, with method :code:`submit(callable, *args, **kwargs)` and returning a Future-like object with methods :code:`done() -> bool` and :code:`result() -> float`. The executor role is to dispatch the execution of the jobs locally/on a cluster/with multithreading depending on the implementation. Eg: :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single threaded execution. This is only used when multiple inputs are given. :param chunksize: The number of inputs to read and process at a time. A larger chunksize improves parallelism but increases memory consumption as more inputs must be stored in memory. This is only used when multiple inputs are given. :return: If a single input is provided, return a single :code:`nx.MultiDiGraph`. Else returns an iterable sequence of :code:`nx.MultiDiGraph` instances. :raises GraphTransformError: If graph conversion fails. :raises TimeoutError: If the specified timeout is reached. """ def _run_one(json_data): return nx_json.node_link_graph(json_data, multigraph=True, directed=True) if isinstance(graphs, ProgramGraph): return _run_one(to_json(graphs, timeout=timeout)) return execute( _run_one, to_json(graphs, timeout=timeout, executor=executor, chunksize=chunksize), executor, chunksize, )
def to_json( graphs: Union[ProgramGraph, Iterable[ProgramGraph]], timeout: int = 300, executor: Optional[ExecutorLike] = None, chunksize: Optional[int] = None, ) -> Union[JsonDict, Iterable[JsonDict]]: """Convert one or more Program Graphs to JSON node-link data. :param graphs: A Program Graph, or a sequence of Program Graphs. :param timeout: The maximum number of seconds to wait for an individual graph conversion before raising an error. If multiple inputs are provided, this timeout is per-input. :param executor: An executor object, with method :code:`submit(callable, *args, **kwargs)` and returning a Future-like object with methods :code:`done() -> bool` and :code:`result() -> float`. The executor role is to dispatch the execution of the jobs locally/on a cluster/with multithreading depending on the implementation. Eg: :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single threaded execution. This is only used when multiple inputs are given. :param chunksize: The number of inputs to read and process at a time. A larger chunksize improves parallelism but increases memory consumption as more inputs must be stored in memory. This is only used when multiple inputs are given. :return: If a single input is provided, return a single JSON dictionary. Else returns an iterable sequence of JSON dictionaries. :raises GraphTransformError: If graph conversion fails. :raises TimeoutError: If the specified timeout is reached. """ def _run_one(graph: ProgramGraph): try: return json.loads( _run_graph_transform_binary( GRAPH2JSON, graph, timeout, )) except json.JSONDecodeError as e: raise GraphTransformError(str(e)) from e if isinstance(graphs, ProgramGraph): return _run_one(graphs) return execute(_run_one, graphs, executor, chunksize)
def to_dot( graphs: Union[ProgramGraph, Iterable[ProgramGraph]], timeout: int = 300, executor: Optional[ExecutorLike] = None, chunksize: Optional[int] = None, ) -> Union[str, Iterable[str]]: """Convert one or more Program Graphs to DOT Graph Description Language. This produces a DOT source string representing the input graph. This can then be rendered using the graphviz command line tools, or parsed using `pydot <https://pypi.org/project/pydot/>`_. :param graphs: A Program Graph, or a sequence of Program Graphs. :param timeout: The maximum number of seconds to wait for an individual graph conversion before raising an error. If multiple inputs are provided, this timeout is per-input. :param executor: An executor object, with method :code:`submit(callable, *args, **kwargs)` and returning a Future-like object with methods :code:`done() -> bool` and :code:`result() -> float`. The executor role is to dispatch the execution of the jobs locally/on a cluster/with multithreading depending on the implementation. Eg: :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single threaded execution. This is only used when multiple inputs are given. :param chunksize: The number of inputs to read and process at a time. A larger chunksize improves parallelism but increases memory consumption as more inputs must be stored in memory. This is only used when multiple inputs are given. :return: A graphviz dot string when a single input is provided, else an iterable sequence of graphviz dot strings. :raises GraphTransformError: If graph conversion fails. :raises TimeoutError: If the specified timeout is reached. """ def _run_one(graph: ProgramGraph) -> str: return _run_graph_transform_binary(GRAPH2DOT, graph, timeout).decode("utf-8") if isinstance(graphs, ProgramGraph): return _run_one(graphs) return execute(_run_one, graphs, executor, chunksize)
def from_cpp( srcs: Union[str, Iterable[str]], copts: Optional[List[str]] = None, system_includes: bool = True, language: str = "c++", version: str = "10", timeout=300, executor: Optional[ExecutorLike] = None, chunksize: Optional[int] = None, ) -> Union[ProgramGraph, Iterable[ProgramGraph]]: """Construct a Program Graph from a string of C/C++ code. This is a convenience function for generating graphs of simple single-file code snippets. For example: >>> programl.from_cpp(\"\"\" ... #include <stdio.h> ... ... int main() { ... printf("Hello, ProGraML!"); ... return 0; ... } ... \"\"\") This is equivalent to invoking clang with input over stdin: .. code-block:: cat <<EOF | clang -xc++ - -c -o - #include <stdio.h> int main() { printf("Hello, ProGraML!"); return 0; } EOF For more control over the clang invocation, see :func:`from_clang`. :param srcs: A string of C / C++, or an iterable sequence of strings of C / C++. :param copts: A list of additional command line arguments to pass to clang. :param system_includes: Detect and pass :code:`-isystem` arguments to clang using the default search path of the system compiler. See :func:`get_system_includes() <programl.util.py.cc_system_includes.get_system_includes>` for details. :param language: The programming language of :code:`srcs`. Must be either :code:`c++` or :code:`c`. :param version: The version of clang to use. See :code:`programl.CLANG_VERSIONS` for a list of available versions. :param timeout: The maximum number of seconds to wait for an individual clang invocation before raising an error. If multiple :code:`srcs` inputs are provided, this timeout is per-input. :param executor: An executor object, with method :code:`submit(callable, *args, **kwargs)` and returning a Future-like object with methods :code:`done() -> bool` and :code:`result() -> float`. The executor role is to dispatch the execution of the jobs locally/on a cluster/with multithreading depending on the implementation. Eg: :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single threaded execution. This is only used when multiple inputs are given. :param chunksize: The number of inputs to read and process at a time. A larger chunksize improves parallelism but increases memory consumption as more inputs must be stored in memory. :return: If :code:`srcs` is singular, returns a single :code:`programl.ProgramGraph` instance. Else returns a generator over :code:`programl.ProgramGraph` instances. :raises UnsupportedCompiler: If the requested compiler version is not supported. :raises GraphCreationError: If compilation of the input fails. :raises TimeoutError: If the specified timeout is reached. """ copts = copts or [] binary = CLANG2GRAPH_BINARIES.get(version) if not binary: raise UnsupportedCompiler( f"Unknown clang version: {version}. " f"Supported versions: {sorted(CLANG2GRAPH_BINARIES.keys())}" ) if system_includes: for directory in get_system_includes(): copts += ["-isystem", str(directory)] def _run_one(src: str): process = subprocess.Popen( [binary, f"-x{language}", "-"] + copts, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) try: stdout, stderr = process.communicate(src.encode("utf-8"), timeout=timeout) except subprocess.TimeoutExpired as e: raise TimeoutError(str(e)) from e return _graph_from_subprocess(process, stdout, stderr) if isinstance(srcs, str): return _run_one(srcs) return execute(_run_one, srcs, executor, chunksize)
def from_llvm_ir( irs: Union[str, Iterable[str]], timeout=300, version: str = "10", executor: Optional[ExecutorLike] = None, chunksize: Optional[int] = None, ) -> Union[ProgramGraph, Iterable[ProgramGraph]]: """Construct a Program Graph from a string of LLVM-IR. This takes as input one or more LLVM-IR strings as generated by :code:`llvm-dis` from a bitcode file, or from :code:`clang` using arguments: :code:`-emit-llvm -S`. Example usage: >>> programl.from_llvm_ir(\"\"\" ... source_filename = "-" ... target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ... target triple = "x86_64-apple-macosx11.0.0" ... ... ; ... ... \"\"\") Multiple inputs can be passed in a single invocation to be batched and processed in parallel. For example: >>> with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor: ... graphs = programl.from_llvm_ir(llvm_ir_strings, executor=executor) :param irs: A string of LLVM-IR, or an iterable sequence of LLVM-IR strings. :param version: The version of LLVM to use. See :code:`programl.LLVM_VERSIONS` for a list of available versions. :param timeout: The maximum number of seconds to wait for an individual graph construction invocation before raising an error. If multiple inputs are provided, this timeout is per-input. :param executor: An executor object, with method :code:`submit(callable, *args, **kwargs)` and returning a Future-like object with methods :code:`done() -> bool` and :code:`result() -> float`. The executor role is to dispatch the execution of the jobs locally/on a cluster/with multithreading depending on the implementation. Eg: :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single threaded execution. This is only used when multiple inputs are given. :param chunksize: The number of inputs to read and process at a time. A larger chunksize improves parallelism but increases memory consumption as more inputs must be stored in memory. :return: If :code:`irs` is a single IR, returns a single :code:`programl.ProgramGraph` instance. Else returns a generator over :code:`programl.ProgramGraph` instances. :raises UnsupportedCompiler: If the requested LLVM version is not supported. :raises GraphCreationError: If graph construction fails. :raises TimeoutError: If the specified timeout is reached. """ binary = LLVM2GRAPH_BINARIES.get(version) if not binary: raise UnsupportedCompiler( f"Unknown llvm version: {version}. " f"Supported versions: {sorted(LLVM2GRAPH_BINARIES.keys())}" ) def _run_one(ir: str): process = subprocess.Popen( [binary, "--stdout_fmt=pb"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) try: stdout, stderr = process.communicate(ir.encode("utf-8"), timeout=timeout) except subprocess.TimeoutExpired as e: raise TimeoutError(str(e)) from e return _graph_from_subprocess(process, stdout, stderr) if isinstance(irs, str): return _run_one(irs) return execute(_run_one, irs, executor, chunksize)
def from_clang( args: Union[List[str], Iterable[List[str]]], system_includes: bool = True, version: str = "10", timeout=300, executor: Optional[ExecutorLike] = None, chunksize: Optional[int] = None, ) -> Union[ProgramGraph, Iterable[ProgramGraph]]: """Run clang and construct a Program Graph from the output. Example usage: >>> programl.from_clang(["/path/to/my/app.c", "-DMY_MACRO=3"]) This is equivalent to invoking clang as: .. code-block:: clang -c /path/to/my/app.c -DMY_MACRO=3 Multiple inputs can be passed in a single invocation to be batched and processed in parallel. For example: >>> with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor: ... programl.from_clang( ... ["a.cc", "-DMY_MACRO=3"], ... ["b.cpp"], ... ["c.c", "-O3", "-std=c99"], ... executor=executor, ... ) :param args: A list of arguments to pass to clang, or an iterable sequence of arguments to pass to clang. :param system_includes: Detect and pass :code:`-isystem` arguments to clang using the default search path of the system compiler. See :func:`get_system_includes() <programl.util.py.cc_system_includes.get_system_includes>` for details. :param version: The version of clang to use. See :code:`programl.CLANG_VERSIONS` for a list of available versions. :param timeout: The maximum number of seconds to wait for an individual clang invocation before raising an error. If multiple inputs are provided, this timeout is per-input. :param executor: An executor object, with method :code:`submit(callable, *args, **kwargs)` and returning a Future-like object with methods :code:`done() -> bool` and :code:`result() -> float`. The executor role is to dispatch the execution of the jobs locally/on a cluster/with multithreading depending on the implementation. Eg: :code:`concurrent.futures.ThreadPoolExecutor`. Defaults to single threaded execution. This is only used when multiple inputs are given. :param chunksize: The number of inputs to read and process at a time. A larger chunksize improves parallelism but increases memory consumption as more inputs must be stored in memory. :return: If :code:`args` is a single list of arguments, returns a single :code:`programl.ProgramGraph` instance. Else returns a generator over :code:`programl.ProgramGraph` instances. :raises UnsupportedCompiler: If the requested compiler version is not supported. :raises GraphCreationError: If compilation of the input fails. :raises TimeoutError: If the specified timeout is reached. """ binary = CLANG2GRAPH_BINARIES.get(version) if not binary: raise UnsupportedCompiler( f"Unknown clang version: {version}. " f"Supported versions: {sorted(CLANG2GRAPH_BINARIES.keys())}" ) extra_copts = [] if system_includes: for directory in get_system_includes(): extra_copts += ["-isystem", str(directory)] def _run_one(one_args): process = subprocess.Popen( [binary] + one_args + extra_copts, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) try: stdout, stderr = process.communicate(timeout=timeout) except subprocess.TimeoutExpired as e: raise TimeoutError(str(e)) from e return _graph_from_subprocess(process, stdout, stderr) if isinstance(args, list) and args and isinstance(args[0], str): return _run_one(args) return execute(_run_one, args, executor, chunksize)