def test_every_node_has_x(simple_bytecode: str): """Test that every edge has a position encoding.""" builder = graph_builder.ProGraMLGraphBuilder() graph = builder.Build(simple_bytecode) for node, x in graph.nodes(data="x"): assert isinstance( x, list), f"Invalid x attribute for node {graph.nodes[node]}"
def test_every_edge_has_position(simple_bytecode: str): """Test that every edge has a position encoding.""" builder = graph_builder.ProGraMLGraphBuilder() graph = builder.Build(simple_bytecode) for src, dst, position in graph.edges(data="position"): assert isinstance( position, int ), f'No position for edge {graph.nodes[src]["text"]} -> {graph.nodes[dst]["text"]}'
def CSourceToInputPair(source: str) -> InputPair: """Create a graph and bytecode for the given C source string. This is a convenience method for generating test inputs. If this method fails, it is because graph construction or clang is broken. """ bytecode = CSourceToBytecode(source) builder = graph_builder.ProGraMLGraphBuilder() graph = builder.Build(bytecode) return InputPair(graph=graph, bytecode=bytecode)
def _ProcessInputs( bytecode_db: bytecode_database.Database, bytecode_ids: typing.List[int] ) -> typing.List[graph_database.GraphMeta]: """Process a set of bytecodes. Returns: A list of analysis-annotated graphs. """ with bytecode_db.Session() as session: jobs = (session.query( bytecode_database.LlvmBytecode.id, bytecode_database.LlvmBytecode.bytecode, bytecode_database.LlvmBytecode.source_name, bytecode_database.LlvmBytecode.relpath, bytecode_database.LlvmBytecode.language, ).filter(bytecode_database.LlvmBytecode.id.in_(bytecode_ids)).all()) bytecode_db.Close() # Don't leave the database connection lying around. builder = graph_builder.ProGraMLGraphBuilder() graph_metas = [] for bytecode_id, bytecode, source_name, relpath, language in jobs: # Haskell uses an older version of LLVM which emits incompatible bytecode. # When processing Haskell code we must use the older version of opt. Else, # the default version is fine. opt = "opt-3.5" if language == "haskell" else None try: with prof.Profile( lambda t: f"Constructed {graph.number_of_nodes()}-node CDFG"): graph = builder.Build(bytecode, opt=opt) graph.bytecode_id = bytecode_id graph.source_name = source_name graph.relpath = relpath graph.language = language graph_metas.append( graph_database.GraphMeta.CreateWithNetworkXGraph(graph)) except Exception as e: _, _, tb = sys.exc_info() tb = traceback.extract_tb(tb, 2) filename, line_number, function_name, *_ = tb[-1] filename = pathlib.Path(filename).name app.Error( "Failed to annotate bytecode with id " "%d: %s (%s:%s:%s() -> %s)", bytecode_id, e, filename, line_number, function_name, type(e).__name__, ) return graph_metas
def test_every_statement_has_a_predecessor(simple_bytecode: str): """Test that every statement (except entry blocks) have control preds.""" builder = graph_builder.ProGraMLGraphBuilder() graph = builder.Build(simple_bytecode) entry_blocks = set( [node for node, _ in nx_utils.EntryBlockIterator(graph)]) for node, _ in nx_utils.StatementNodeIterator(graph): if not node or node in entry_blocks: continue for edge in graph.in_edges(node): if graph.edges[edge[0], edge[1], 0]["flow"] == programl_pb2.Edge.CONTROL: break else: assert False, f"{node} has no control flow predecessor."
def test_ComposeGraphs_undefined(): """Test that function graph is inserted for call to undefined function.""" builder = graph_builder.ProGraMLGraphBuilder() A = nx.MultiDiGraph(name="A") A.entry_block = "A_entry" A.exit_block = "A_exit" A.add_node(A.entry_block, type="statement", function="A", text="") A.add_node(A.exit_block, type="statement", function="A", text="") A.add_node("call", type="statement", function="A", text="call i32 @B(i32 1)") A.add_edge(A.entry_block, "call", flow="control", function="A") A.add_edge("call", A.exit_block, flow="control", function="A") call_graph = nx.MultiDiGraph() call_graph.add_edge("external node", "A") call_graph.add_edge("external node", "B") call_graph.add_edge("A", "B") g = builder.ComposeGraphs([A], call_graph) assert "root" in g assert "B_entry" in g assert "B_exit" in g assert g.edges("call", "B_entry") assert g.edges("B_exit", "call") assert g.number_of_nodes() == 6 assert g.edges("root", "A") assert g.edges("root", "B")
def test_that_root_node_is_connected_to_entry_points(simple_bytecode: str): builder = graph_builder.ProGraMLGraphBuilder() graph = builder.Build(simple_bytecode) assert graph.nodes[0]["text"] == "root" assert graph.in_degree(0) == 0 assert len(graph.out_edges(0)) == 2
def builder() -> graph_builder.ProGraMLGraphBuilder: """Test fixture that returns the graph builder for regression tests.""" return graph_builder.ProGraMLGraphBuilder()
def CreateCDFG(bytecode: str) -> nx.MultiDiGraph: builder = graph_builder.ProGraMLGraphBuilder() return builder.Build(bytecode)
) ======= scop_graphs, _ = opt_util.DotGraphsFromBytecode(bytecode, [ '-O1', '-polly-process-unprofitable', '-polly-optimized-scops', '-polly-dot', '-polly-optimizer=none' ]) >>>>>>> edb8c21d9... Automated code format.:deeplearning/ml4pl/graphs/labelled/polyhedra/polyhedra.py # Loop over each function max_steps = 0 cdfgs = [] for i, graph in enumerate(scop_graphs): graph_annotator = PolyhedralRegionAnnotator() dot = graph cfg = llvm_util.ControlFlowGraphFromDotSource(dot, tag_hook=graph_annotator) builder = graph_builder.ProGraMLGraphBuilder() annotated_cdfg = builder.BuildFromControlFlowGraph(cfg) <<<<<<< HEAD:deeplearning/ml4pl/graphs/labelled/dataflow/polyhedra/polyhedra.py steps = sum( 1 for nid, node in annotated_cdfg.nodes(data=True) if node.get("polyhedral") ) ======= steps = sum(1 for nid, node in annotated_cdfg.nodes(data=True) if node.get('polyhedral')) >>>>>>> edb8c21d9... Automated code format.:deeplearning/ml4pl/graphs/labelled/polyhedra/polyhedra.py max_steps = max(max_steps, steps) cdfgs.append(annotated_cdfg)
def Main(): """Main entry point.""" bytecode = sys.stdin.read() builder = graph_builder.ProGraMLGraphBuilder() g = builder.Build(bytecode, FLAGS.opt) print(programl.NetworkXToProgramGraph(g))
#from torch_geometric.data import Data, DataLoader, InMemoryDataset #import torch_geometric # make this file executable from anywhere #if __name__ == '__main__': full_path = os.path.realpath(__file__) print(full_path) repo_root = full_path.rsplit('ProGraML', maxsplit=1)[0] + 'ProGraML' print(repo_root) #insert at 1, 0 is the script path (or '' in REPL) sys.path.insert(1, repo_root) repo_root = Path(repo_root) from deeplearning.ml4pl.graphs.unlabelled.llvm2graph import graph_builder builder = graph_builder.ProGraMLGraphBuilder() #opt='/usr/bin/opt') builder7 = graph_builder.ProGraMLGraphBuilder(opt='/usr/bin/opt') file_to_debug = '/mnt/data/llvm/master_thesis_datasets/unsupervised_ncc_data/amd_app_sdk/amd/AtomicCounters.ll' #with open('/mnt/data/llvm/master_thesis_datasets/unsupervised_ncc_data/amd_app_sdk/amd_ocl/AMDAPPSDK-3.0_samples_bolt_BoxFilterSAT_BoxFilterSAT_Kernels.ll', 'r') as f: #with open('/mnt/data/llvm/master_thesis_datasets/unsupervised_ncc_data/eigen/eigen_matmul_3/eigen_matmul-266.ll_', 'r') as f: #with open(repo_root / 'deeplearning/ml4pl/poj104' / '71.ll', 'r') as f: with open(file_to_debug, 'r') as f: ll = f.read() nx_graph = builder.Build(ll) nx_graph7 = builder7.Build(ll) for i in range(5): nn = builder.Build(ll) print(f"====== {i} =====")
def MakePolyhedralGraphs( bytecode: str, n: typing.Optional[int] = None, false=False, true=True, ) -> typing.Iterable[nx.MultiDiGraph]: """Create an annotated graph from a bytecode that potentially contains polyhedral loops. Args: bytecode: The bytecode which produced the input graph. n: The maximum number of graphs to produce. This value is ignored and one graph will be produced with all polyhedral regions annotated. false: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method is hardcoded to use 2-class 1-hots. true: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method is hardcoded to use 2-class 1-hots. Returns: A generator of annotated graphs, where each graph has 'x' and 'y' labels on the statement nodes, and additionally a 'data_flow_max_steps_required' attribute which is set to the largest number of statements in a polyhedral block. """ # TODO(github.com/ChrisCummins/ProGraML/issues/2): Replace true/false args # with a list of class values for all graph annotator functions. del false del true del n # One-hot encoding false = np.array([1, 0], np.int64) true = np.array([0, 1], np.int64) # Canonicalize input graph (see http://polly.llvm.org/docs/Architecture.html) bytecode = BytecodeToPollyCanonicalized(bytecode) g = CreateCDFG(bytecode) # Build the polyhedral building blocks scop_graphs, _ = opt_util.DotGraphsFromBytecode( bytecode, [ "-O1", "-polly-process-unprofitable", "-polly-optimized-scops", "-polly-dot", "-polly-optimizer=none", ], ) # Loop over each function max_steps = 0 cdfgs = [] for i, graph in enumerate(scop_graphs): graph_annotator = PolyhedralRegionAnnotator() dot = graph cfg = llvm_util.ControlFlowGraphFromDotSource(dot, tag_hook=graph_annotator) builder = graph_builder.ProGraMLGraphBuilder() annotated_cdfg = builder.BuildFromControlFlowGraph(cfg) steps = sum(1 for nid, node in annotated_cdfg.nodes(data=True) if node.get("polyhedral")) max_steps = max(max_steps, steps) cdfgs.append(annotated_cdfg) labelled = g.copy() labelled.data_flow_max_steps_required = max_steps AnnotatePolyhedra(labelled, cdfgs, false=false, true=true) yield labelled