def test_ControlFlowGraphFromDotSource_is_valid(): """Test that CFG is valid.""" g = llvm_util.ControlFlowGraphFromDotSource(SIMPLE_C_DOT) # Control flow graphs are not guaranteed to be valid. That is, the may contain # fusible basic blocks. This can happen if the creating the graph from # unoptimized bytecode. assert g.ValidateControlFlowGraph()
def test_BuildFullFlowGraph_edges(): """Test flow graph has expected edges.""" # This test assumes that ControlFlowGraphFromDotSource() behaves as expected. # test_ControlFlowGraphFromDotSource_fizz_buzz() will fail if this is not the # case. cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT) sig = cfg.BuildFullFlowGraph() # Create a map of node names to indices. name_to_node = {data["name"]: node for node, data in sig.nodes(data=True)} # Block %1. assert sig.has_edge(name_to_node["%1.0"], name_to_node["%1.1"]) assert sig.has_edge(name_to_node["%1.1"], name_to_node["%1.2"]) assert sig.has_edge(name_to_node["%1.2"], name_to_node["%1.3"]) assert sig.has_edge(name_to_node["%1.3"], name_to_node["%1.4"]) assert sig.has_edge(name_to_node["%1.4"], name_to_node["%1.5"]) assert sig.has_edge(name_to_node["%1.5"], name_to_node["%1.6"]) assert sig.has_edge(name_to_node["%1.6"], name_to_node["%7.0"]) assert sig.has_edge(name_to_node["%1.6"], name_to_node["%8.0"]) # Block %7. assert sig.has_edge(name_to_node["%7.0"], name_to_node["%7.1"]) assert sig.has_edge(name_to_node["%7.1"], name_to_node["%9.0"]) # Block %8. assert sig.has_edge(name_to_node["%8.0"], name_to_node["%8.1"]) assert sig.has_edge(name_to_node["%8.1"], name_to_node["%9.0"]) # Block %9. assert sig.has_edge(name_to_node["%9.0"], name_to_node["%9.1"])
def _ExtractGraphBatchOrDie( src_file_paths: typing.List[pathlib.Path], headers_dir: pathlib.Path ) -> typing.List[typing.Tuple[pathlib.Path, llvm_util.LlvmControlFlowGraph]]: """Process a patch of OpenCL sources to graphs. Args: src_file_paths: A list of source code paths. headers_dir: The directory containing header files. Returns: A list of <path,cfg> tuples. """ batch = [] for src_file_path in src_file_paths: app.Log(1, "Compiling %s", src_file_path.name) bytecode = ncc.ExtractLlvmByteCodeOrDie(src_file_path, headers_dir) dot_strings = list(opt_util.DotControlFlowGraphsFromBytecode(bytecode)) cfgs = [llvm_util.ControlFlowGraphFromDotSource(dot) for dot in dot_strings] if len(cfgs) != 1: app.Fatal( "Found %d CFGs in %s: %s", len(dot_strings), src_file_path.name, [c.graph["name"] for c in cfgs], ) ffg = cfgs[0].BuildFullFlowGraph() # Set the input bytecode as a graph property. ffg.graph["llvm_bytecode"] = bytecode batch.append((src_file_path, ffg)) return batch
def test_BuildFullFlowGraph_num_edges(): """Test flow graph edge count.""" # This test assumes that ControlFlowGraphFromDotSource() behaves as expected. # test_ControlFlowGraphFromDotSource_fizz_buzz() will fail if this is not the # case. cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT) sig = cfg.BuildFullFlowGraph() assert sig.number_of_edges() == 13
def test_BuildFullFlowGraph_fizz_buzz(): """Test flow graph name.""" # This test assumes that ControlFlowGraphFromDotSource() behaves as expected. # test_ControlFlowGraphFromDotSource_fizz_buzz() will fail if this is not the # case. cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT) sig = cfg.BuildFullFlowGraph() assert sig.graph["name"] == "FizzBuzz"
def test_BuildFullFlowGraph_exit_block(): """Test flow graph has expected exit block.""" # This test assumes that ControlFlowGraphFromDotSource() behaves as expected. # test_ControlFlowGraphFromDotSource_fizz_buzz() will fail if this is not the # case. cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT) sig = cfg.BuildFullFlowGraph() # Create a map of node names to indices. name_to_node = {data["name"]: node for node, data in sig.nodes(data=True)} assert sig.nodes[name_to_node["%9.1"]]["exit"]
def test_ControlFlowGraphFromDotSource_edges(): """Test that CFG edges are as expected.""" g = llvm_util.ControlFlowGraphFromDotSource(SIMPLE_C_DOT) node_name_to_index_map = {g.nodes[n]["name"]: n for n in g.nodes} edges = set(g.edges) assert (node_name_to_index_map["%2"], node_name_to_index_map["%9"]) in edges assert (node_name_to_index_map["%2"], node_name_to_index_map["%12"]) in edges assert (node_name_to_index_map["%9"], node_name_to_index_map["%18"]) in edges assert (node_name_to_index_map["%12"], node_name_to_index_map["%18"]) in edges
def test_ControlFlowGraphFromDotSource_positions(): """A control flow graph with two nested loops. int A() { int n = 0; for (int i = 0; i < 10; ++i) { switch (n) { case 0: n += 1; case 1: n += 2; default: n += 3; break; } } return n; } """ cfg = llvm_util.ControlFlowGraphFromDotSource(""" digraph "CFG for 'A' function" { label="CFG for 'A' function"; Node0x7ff981522700 [shape=record,label="{%0:\l %1 = alloca i32, align 4\l %2 = alloca i32, align 4\l store i32 0, i32* %1, align 4\l store i32 0, i32* %2, align 4\l br label %3\l}"]; Node0x7ff981522700 -> Node0x7ff981522980; Node0x7ff981522980 [shape=record,label="{%3:\l\l %4 = load i32, i32* %2, align 4\l %5 = icmp slt i32 %4, 10\l br i1 %5, label %6, label %21\l|{<s0>T|<s1>F}}"]; Node0x7ff981522980:s0 -> Node0x7ff981522b50; Node0x7ff981522980:s1 -> Node0x7ff981522bd0; Node0x7ff981522b50 [shape=record,label="{%6:\l\l %7 = load i32, i32* %1, align 4\l switch i32 %7, label %14 [\l i32 0, label %8\l i32 1, label %11\l ]\l|{<s0>def|<s1>0|<s2>1}}"]; Node0x7ff981522b50:s0 -> Node0x7ff981522b90; Node0x7ff981522b50:s1 -> Node0x7ff981522d30; Node0x7ff981522b50:s2 -> Node0x7ff981522db0; Node0x7ff981522d30 [shape=record,label="{%8:\l\l %9 = load i32, i32* %1, align 4\l %10 = add nsw i32 %9, 1\l store i32 %10, i32* %1, align 4\l br label %11\l}"]; Node0x7ff981522d30 -> Node0x7ff981522db0; Node0x7ff981522db0 [shape=record,label="{%11:\l\l %12 = load i32, i32* %1, align 4\l %13 = add nsw i32 %12, 2\l store i32 %13, i32* %1, align 4\l br label %14\l}"]; Node0x7ff981522db0 -> Node0x7ff981522b90; Node0x7ff981522b90 [shape=record,label="{%14:\l\l %15 = load i32, i32* %1, align 4\l %16 = add nsw i32 %15, 3\l store i32 %16, i32* %1, align 4\l br label %17\l}"]; Node0x7ff981522b90 -> Node0x7ff981522740; Node0x7ff981522740 [shape=record,label="{%17:\l\l br label %18\l}"]; Node0x7ff981522740 -> Node0x7ff981522d70; Node0x7ff981522d70 [shape=record,label="{%18:\l\l %19 = load i32, i32* %2, align 4\l %20 = add nsw i32 %19, 1\l store i32 %20, i32* %2, align 4\l br label %3\l}"]; Node0x7ff981522d70 -> Node0x7ff981522980; Node0x7ff981522bd0 [shape=record,label="{%21:\l\l %22 = load i32, i32* %1, align 4\l ret i32 %22\l}"]; } """) positions = set() for _, _, position in cfg.edges(data="position"): positions.add(position) assert positions == {0, 1, 2}
def test_ControlFlowGraphFromDotSource_fizz_buzz(): """Test the fizz buzz graph properties.""" cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT) assert cfg.graph["name"] == "FizzBuzz" assert cfg.number_of_nodes() == 4 assert cfg.number_of_edges() == 4 # Create a map of node names to indices. name_to_node = {data["name"]: node for node, data in cfg.nodes(data=True)} # Test that graph has required edges. assert cfg.has_edge(name_to_node["%1"], name_to_node["%7"]) assert cfg.has_edge(name_to_node["%1"], name_to_node["%8"]) assert cfg.has_edge(name_to_node["%7"], name_to_node["%9"]) assert cfg.has_edge(name_to_node["%8"], name_to_node["%9"])
def TryToCreateControlFlowGraphsFromLinuxSrc( path: pathlib.Path, ) -> typing.List[cfg.ControlFlowGraph]: """Try to create CFGs from a Linux C source file. On failure, an empty list is returned. Args: path: The path of the source file. Returns: A list of ControlFlowGraph instances. Raises: ClangException: If compiling to bytecode fails. """ graphs = [] try: bytecode, _ = BytecodeFromLinuxSrc(path, "-O0") except clang.ClangException: return graphs # Extract a dot sources from the bytecode. dot_generator = opt_util.DotControlFlowGraphsFromBytecode(bytecode) while True: try: dot = next(dot_generator) # Instantiate a CFG from the dot source. graph = llvm_util.ControlFlowGraphFromDotSource(dot) graph.ValidateControlFlowGraph(strict=False) graphs.append(graph) except ( UnicodeDecodeError, cfg.MalformedControlFlowGraphError, ValueError, opt.OptException, pyparsing.ParseException, ): pass except StopIteration: break return graphs
def _Build(self, bytecode: str, tag_hook: llvm_util.TagHook): """Private implementation of Build function.""" # First construct the control flow graphs using opt. ( call_graph_dot, cfg_dots, ) = opt_util.DotCallGraphAndControlFlowGraphsFromBytecode( bytecode, opt_path=self.opt) # Then construct the call graph dot using opt. call_graph = cg.CallGraphFromDotSource(call_graph_dot) # Construct NetworkX control flow graphs from the dot graphs. cfgs = [ llvm_util.ControlFlowGraphFromDotSource(cfg_dot, tag_hook=tag_hook) for cfg_dot in cfg_dots ] # Add data flow elements to control flow graphs. graphs = [self.CreateControlAndDataFlowUnion(cfg) for cfg in cfgs] # Finally, compose the per-function graphs into a whole-module graph. return self.ComposeGraphs(graphs, call_graph)
def test_BuildFullFlowGraph_node_text(): """Test flow graph nodes have expected text.""" # This test assumes that ControlFlowGraphFromDotSource() behaves as expected. # test_ControlFlowGraphFromDotSource_fizz_buzz() will fail if this is not the # case. cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT) sig = cfg.BuildFullFlowGraph() # Create a map of node names to indices. name_to_node = {data["name"]: node for node, data in sig.nodes(data=True)} # Block %1. assert sig.nodes[ name_to_node["%1.0"]]["text"] == "%2 = alloca i32, align 4" assert sig.nodes[ name_to_node["%1.1"]]["text"] == "%3 = alloca i32, align 4" assert (sig.nodes[name_to_node["%1.2"]]["text"] == "store i32 %0, i32* %3, align 4") assert (sig.nodes[name_to_node["%1.3"]]["text"] == "%4 = load i32, i32* %3, align 4") assert sig.nodes[name_to_node["%1.4"]]["text"] == "%5 = srem i32 %4, 15" assert sig.nodes[name_to_node["%1.5"]]["text"] == "%6 = icmp eq i32 %5, 0" # Note the conditional branch instruction has had the labels stripped. assert sig.nodes[name_to_node["%1.6"]]["text"] == "br i1 %6" # Block %7. assert (sig.nodes[name_to_node["%7.0"]]["text"] == "store i32 1, i32* %2, align 4") # Block %8. assert (sig.nodes[name_to_node["%8.0"]]["text"] == "store i32 0, i32* %2, align 4") # Block %9. assert (sig.nodes[name_to_node["%9.0"]]["text"] == "%10 = load i32, i32* %2, align 4") assert sig.nodes[name_to_node["%9.1"]]["text"] == "ret i32 %10"
def CreateControlFlowGraphFromOpenClKernel( kernel_name: str, opencl_kernel: str) -> typing.Optional[cfg.ControlFlowGraph]: """Try to create a CFG proto from an opencl kernel. Args: kernel_name: The name of the OpenCL kernel defined in opencl_kernel. opencl_kernel: A string of OpenCL. This should contain a single kernel definition. Returns: A ControlFlowGraph instance, or None if compilation to bytecode fails. Raises: ClangException: If compiling to bytecode fails. ValueError: If opencl_kernel contains multiple functions. """ bytecode, _ = BytecodeFromOpenClString(opencl_kernel, "-O0") # Extract a single dot source from the bytecode. dot_generator = opt_util.DotControlFlowGraphsFromBytecode(bytecode) dot = next(dot_generator) try: next(dot_generator) raise ValueError("Bytecode produced more than one dot source!") except StopIteration: pass # Instantiate a CFG from the dot source. graph = llvm_util.ControlFlowGraphFromDotSource(dot) # Set the name of the graph to the kernel name. This is because the src code # has been preprocessed, so that each kernel is named 'A'. graph.graph["name"] = kernel_name return graph
def MakePolyhedralGraphs( bytecode: str, n: typing.Optional[int] = None, false=False, true=True, ) -> typing.Iterable[nx.MultiDiGraph]: """Create an annotated graph from a bytecode that potentially contains polyhedral loops. Args: bytecode: The bytecode which produced the input graph. n: The maximum number of graphs to produce. This value is ignored and one graph will be produced with all polyhedral regions annotated. false: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method is hardcoded to use 2-class 1-hots. true: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method is hardcoded to use 2-class 1-hots. Returns: A generator of annotated graphs, where each graph has 'x' and 'y' labels on the statement nodes, and additionally a 'data_flow_max_steps_required' attribute which is set to the largest number of statements in a polyhedral block. """ # TODO(github.com/ChrisCummins/ProGraML/issues/2): Replace true/false args # with a list of class values for all graph annotator functions. del false del true del n # One-hot encoding false = np.array([1, 0], np.int64) true = np.array([0, 1], np.int64) # Canonicalize input graph (see http://polly.llvm.org/docs/Architecture.html) bytecode = BytecodeToPollyCanonicalized(bytecode) g = CreateCDFG(bytecode) # Build the polyhedral building blocks scop_graphs, _ = opt_util.DotGraphsFromBytecode( bytecode, [ "-O1", "-polly-process-unprofitable", "-polly-optimized-scops", "-polly-dot", "-polly-optimizer=none", ], ) # Loop over each function max_steps = 0 cdfgs = [] for i, graph in enumerate(scop_graphs): graph_annotator = PolyhedralRegionAnnotator() dot = graph cfg = llvm_util.ControlFlowGraphFromDotSource(dot, tag_hook=graph_annotator) builder = graph_builder.ProGraMLGraphBuilder() annotated_cdfg = builder.BuildFromControlFlowGraph(cfg) steps = sum( 1 for nid, node in annotated_cdfg.nodes(data=True) if node.get("polyhedral") ) max_steps = max(max_steps, steps) cdfgs.append(annotated_cdfg) labelled = g.copy() labelled.data_flow_max_steps_required = max_steps AnnotatePolyhedra(labelled, cdfgs, false=false, true=true) yield labelled
def test_ControlFlowGraphFromDotSource_graph_name(): """Test that CFG has correct name.""" g = llvm_util.ControlFlowGraphFromDotSource(SIMPLE_C_DOT) assert g.graph["name"] == "DoSomething"
def test_ControlFlowGraphFromDotSource_num_edges(): """Test that CFG has correct number of edges.""" g = llvm_util.ControlFlowGraphFromDotSource(SIMPLE_C_DOT) assert g.number_of_edges() == 4
def test_ControlFlowGraphFromDotSource_node_names(): """Test that CFG names are as expected.""" g = llvm_util.ControlFlowGraphFromDotSource(SIMPLE_C_DOT) node_names = sorted([g.nodes[n]["name"] for n in g.nodes], key=lambda x: int(x[1:])) assert node_names == ["%2", "%9", "%12", "%18"]
def test_ControlFlowGraphFromDotSource_invalid_source(): """Test that exception is raised if dot can't be parsed.""" with test.Raises(pyparsing.ParseException): llvm_util.ControlFlowGraphFromDotSource("invalid dot source!")