def test_every_node_has_x(simple_bytecode: str):
    """Test that every edge has a position encoding."""
    builder = graph_builder.ProGraMLGraphBuilder()
    graph = builder.Build(simple_bytecode)
    for node, x in graph.nodes(data="x"):
        assert isinstance(
            x, list), f"Invalid x attribute for node {graph.nodes[node]}"
def test_every_edge_has_position(simple_bytecode: str):
    """Test that every edge has a position encoding."""
    builder = graph_builder.ProGraMLGraphBuilder()
    graph = builder.Build(simple_bytecode)
    for src, dst, position in graph.edges(data="position"):
        assert isinstance(
            position, int
        ), f'No position for edge {graph.nodes[src]["text"]} -> {graph.nodes[dst]["text"]}'
def CSourceToInputPair(source: str) -> InputPair:
  """Create a graph and bytecode for the given C source string.
  This is a convenience method for generating test inputs. If this method fails,
  it is because graph construction or clang is broken.
  """
  bytecode = CSourceToBytecode(source)
  builder = graph_builder.ProGraMLGraphBuilder()
  graph = builder.Build(bytecode)
  return InputPair(graph=graph, bytecode=bytecode)
Exemple #4
0
def _ProcessInputs(
        bytecode_db: bytecode_database.Database, bytecode_ids: typing.List[int]
) -> typing.List[graph_database.GraphMeta]:
    """Process a set of bytecodes.

  Returns:
    A list of analysis-annotated graphs.
  """
    with bytecode_db.Session() as session:
        jobs = (session.query(
            bytecode_database.LlvmBytecode.id,
            bytecode_database.LlvmBytecode.bytecode,
            bytecode_database.LlvmBytecode.source_name,
            bytecode_database.LlvmBytecode.relpath,
            bytecode_database.LlvmBytecode.language,
        ).filter(bytecode_database.LlvmBytecode.id.in_(bytecode_ids)).all())
    bytecode_db.Close()  # Don't leave the database connection lying around.

    builder = graph_builder.ProGraMLGraphBuilder()

    graph_metas = []

    for bytecode_id, bytecode, source_name, relpath, language in jobs:
        # Haskell uses an older version of LLVM which emits incompatible bytecode.
        # When processing Haskell code we must use the older version of opt. Else,
        # the default version is fine.
        opt = "opt-3.5" if language == "haskell" else None

        try:
            with prof.Profile(
                    lambda t:
                    f"Constructed {graph.number_of_nodes()}-node CDFG"):
                graph = builder.Build(bytecode, opt=opt)
            graph.bytecode_id = bytecode_id
            graph.source_name = source_name
            graph.relpath = relpath
            graph.language = language
            graph_metas.append(
                graph_database.GraphMeta.CreateWithNetworkXGraph(graph))
        except Exception as e:
            _, _, tb = sys.exc_info()
            tb = traceback.extract_tb(tb, 2)
            filename, line_number, function_name, *_ = tb[-1]
            filename = pathlib.Path(filename).name
            app.Error(
                "Failed to annotate bytecode with id "
                "%d: %s (%s:%s:%s() -> %s)",
                bytecode_id,
                e,
                filename,
                line_number,
                function_name,
                type(e).__name__,
            )
    return graph_metas
def test_every_statement_has_a_predecessor(simple_bytecode: str):
    """Test that every statement (except entry blocks) have control preds."""
    builder = graph_builder.ProGraMLGraphBuilder()
    graph = builder.Build(simple_bytecode)
    entry_blocks = set(
        [node for node, _ in nx_utils.EntryBlockIterator(graph)])
    for node, _ in nx_utils.StatementNodeIterator(graph):
        if not node or node in entry_blocks:
            continue
        for edge in graph.in_edges(node):
            if graph.edges[edge[0], edge[1],
                           0]["flow"] == programl_pb2.Edge.CONTROL:
                break
        else:
            assert False, f"{node} has no control flow predecessor."
def test_ComposeGraphs_undefined():
    """Test that function graph is inserted for call to undefined function."""
    builder = graph_builder.ProGraMLGraphBuilder()

    A = nx.MultiDiGraph(name="A")
    A.entry_block = "A_entry"
    A.exit_block = "A_exit"

    A.add_node(A.entry_block, type="statement", function="A", text="")
    A.add_node(A.exit_block, type="statement", function="A", text="")
    A.add_node("call",
               type="statement",
               function="A",
               text="call i32 @B(i32 1)")

    A.add_edge(A.entry_block, "call", flow="control", function="A")
    A.add_edge("call", A.exit_block, flow="control", function="A")

    call_graph = nx.MultiDiGraph()
    call_graph.add_edge("external node", "A")
    call_graph.add_edge("external node", "B")
    call_graph.add_edge("A", "B")

    g = builder.ComposeGraphs([A], call_graph)

    assert "root" in g
    assert "B_entry" in g
    assert "B_exit" in g

    assert g.edges("call", "B_entry")
    assert g.edges("B_exit", "call")

    assert g.number_of_nodes() == 6

    assert g.edges("root", "A")
    assert g.edges("root", "B")
def test_that_root_node_is_connected_to_entry_points(simple_bytecode: str):
    builder = graph_builder.ProGraMLGraphBuilder()
    graph = builder.Build(simple_bytecode)
    assert graph.nodes[0]["text"] == "root"
    assert graph.in_degree(0) == 0
    assert len(graph.out_edges(0)) == 2
Exemple #8
0
def builder() -> graph_builder.ProGraMLGraphBuilder:
    """Test fixture that returns the graph builder for regression tests."""
    return graph_builder.ProGraMLGraphBuilder()
Exemple #9
0
def CreateCDFG(bytecode: str) -> nx.MultiDiGraph:
  builder = graph_builder.ProGraMLGraphBuilder()
  return builder.Build(bytecode)
Exemple #10
0
  )
=======
  scop_graphs, _ = opt_util.DotGraphsFromBytecode(bytecode, [
      '-O1', '-polly-process-unprofitable', '-polly-optimized-scops',
      '-polly-dot', '-polly-optimizer=none'
  ])
>>>>>>> edb8c21d9... Automated code format.:deeplearning/ml4pl/graphs/labelled/polyhedra/polyhedra.py

  # Loop over each function
  max_steps = 0
  cdfgs = []
  for i, graph in enumerate(scop_graphs):
    graph_annotator = PolyhedralRegionAnnotator()
    dot = graph
    cfg = llvm_util.ControlFlowGraphFromDotSource(dot, tag_hook=graph_annotator)
    builder = graph_builder.ProGraMLGraphBuilder()
    annotated_cdfg = builder.BuildFromControlFlowGraph(cfg)

<<<<<<< HEAD:deeplearning/ml4pl/graphs/labelled/dataflow/polyhedra/polyhedra.py
    steps = sum(
      1
      for nid, node in annotated_cdfg.nodes(data=True)
      if node.get("polyhedral")
    )
=======
    steps = sum(1 for nid, node in annotated_cdfg.nodes(data=True)
                if node.get('polyhedral'))
>>>>>>> edb8c21d9... Automated code format.:deeplearning/ml4pl/graphs/labelled/polyhedra/polyhedra.py
    max_steps = max(max_steps, steps)
    cdfgs.append(annotated_cdfg)
Exemple #11
0
def Main():
    """Main entry point."""
    bytecode = sys.stdin.read()
    builder = graph_builder.ProGraMLGraphBuilder()
    g = builder.Build(bytecode, FLAGS.opt)
    print(programl.NetworkXToProgramGraph(g))
#from torch_geometric.data import Data, DataLoader, InMemoryDataset
#import torch_geometric

# make this file executable from anywhere
#if __name__ == '__main__':
full_path = os.path.realpath(__file__)
print(full_path)
repo_root = full_path.rsplit('ProGraML', maxsplit=1)[0] + 'ProGraML'
print(repo_root)
#insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, repo_root)
repo_root = Path(repo_root)

from deeplearning.ml4pl.graphs.unlabelled.llvm2graph import graph_builder

builder = graph_builder.ProGraMLGraphBuilder()  #opt='/usr/bin/opt')
builder7 = graph_builder.ProGraMLGraphBuilder(opt='/usr/bin/opt')

file_to_debug = '/mnt/data/llvm/master_thesis_datasets/unsupervised_ncc_data/amd_app_sdk/amd/AtomicCounters.ll'
#with open('/mnt/data/llvm/master_thesis_datasets/unsupervised_ncc_data/amd_app_sdk/amd_ocl/AMDAPPSDK-3.0_samples_bolt_BoxFilterSAT_BoxFilterSAT_Kernels.ll', 'r') as f:
#with open('/mnt/data/llvm/master_thesis_datasets/unsupervised_ncc_data/eigen/eigen_matmul_3/eigen_matmul-266.ll_', 'r') as f:
#with open(repo_root / 'deeplearning/ml4pl/poj104' / '71.ll', 'r') as f:
with open(file_to_debug, 'r') as f:
    ll = f.read()

nx_graph = builder.Build(ll)
nx_graph7 = builder7.Build(ll)

for i in range(5):
    nn = builder.Build(ll)
    print(f"====== {i} =====")
Exemple #13
0
def MakePolyhedralGraphs(
    bytecode: str,
    n: typing.Optional[int] = None,
    false=False,
    true=True,
) -> typing.Iterable[nx.MultiDiGraph]:
    """Create an annotated graph from a bytecode that potentially contains
     polyhedral loops.

  Args:
    bytecode: The bytecode which produced the input graph.
    n: The maximum number of graphs to produce. This value is ignored and one graph
      will be produced with all polyhedral regions annotated.
    false: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method
      is hardcoded to use 2-class 1-hots.
    true: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method
      is hardcoded to use 2-class 1-hots.

  Returns:
    A generator of annotated graphs, where each graph has 'x' and 'y' labels on
    the statement nodes, and additionally a 'data_flow_max_steps_required'
    attribute which is set to the largest number of statements in a polyhedral block.
  """
    # TODO(github.com/ChrisCummins/ProGraML/issues/2): Replace true/false args
    # with a list of class values for all graph annotator functions.
    del false
    del true
    del n

    # One-hot encoding
    false = np.array([1, 0], np.int64)
    true = np.array([0, 1], np.int64)

    # Canonicalize input graph (see http://polly.llvm.org/docs/Architecture.html)
    bytecode = BytecodeToPollyCanonicalized(bytecode)
    g = CreateCDFG(bytecode)

    # Build the polyhedral building blocks
    scop_graphs, _ = opt_util.DotGraphsFromBytecode(
        bytecode,
        [
            "-O1",
            "-polly-process-unprofitable",
            "-polly-optimized-scops",
            "-polly-dot",
            "-polly-optimizer=none",
        ],
    )

    # Loop over each function
    max_steps = 0
    cdfgs = []
    for i, graph in enumerate(scop_graphs):
        graph_annotator = PolyhedralRegionAnnotator()
        dot = graph
        cfg = llvm_util.ControlFlowGraphFromDotSource(dot,
                                                      tag_hook=graph_annotator)
        builder = graph_builder.ProGraMLGraphBuilder()
        annotated_cdfg = builder.BuildFromControlFlowGraph(cfg)

        steps = sum(1 for nid, node in annotated_cdfg.nodes(data=True)
                    if node.get("polyhedral"))
        max_steps = max(max_steps, steps)
        cdfgs.append(annotated_cdfg)

    labelled = g.copy()
    labelled.data_flow_max_steps_required = max_steps
    AnnotatePolyhedra(labelled, cdfgs, false=false, true=true)
    yield labelled