예제 #1
0
def test_ControlFlowGraphFromDotSource_is_valid():
    """Test that CFG is valid."""
    g = llvm_util.ControlFlowGraphFromDotSource(SIMPLE_C_DOT)
    # Control flow graphs are not guaranteed to be valid. That is, the may contain
    # fusible basic blocks. This can happen if the creating the graph from
    # unoptimized bytecode.
    assert g.ValidateControlFlowGraph()
예제 #2
0
def test_BuildFullFlowGraph_edges():
    """Test flow graph has expected edges."""
    # This test assumes that ControlFlowGraphFromDotSource() behaves as expected.
    # test_ControlFlowGraphFromDotSource_fizz_buzz() will fail if this is not the
    # case.
    cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT)
    sig = cfg.BuildFullFlowGraph()

    # Create a map of node names to indices.
    name_to_node = {data["name"]: node for node, data in sig.nodes(data=True)}

    # Block %1.
    assert sig.has_edge(name_to_node["%1.0"], name_to_node["%1.1"])
    assert sig.has_edge(name_to_node["%1.1"], name_to_node["%1.2"])
    assert sig.has_edge(name_to_node["%1.2"], name_to_node["%1.3"])
    assert sig.has_edge(name_to_node["%1.3"], name_to_node["%1.4"])
    assert sig.has_edge(name_to_node["%1.4"], name_to_node["%1.5"])
    assert sig.has_edge(name_to_node["%1.5"], name_to_node["%1.6"])
    assert sig.has_edge(name_to_node["%1.6"], name_to_node["%7.0"])
    assert sig.has_edge(name_to_node["%1.6"], name_to_node["%8.0"])

    # Block %7.
    assert sig.has_edge(name_to_node["%7.0"], name_to_node["%7.1"])
    assert sig.has_edge(name_to_node["%7.1"], name_to_node["%9.0"])

    # Block %8.
    assert sig.has_edge(name_to_node["%8.0"], name_to_node["%8.1"])
    assert sig.has_edge(name_to_node["%8.1"], name_to_node["%9.0"])

    # Block %9.
    assert sig.has_edge(name_to_node["%9.0"], name_to_node["%9.1"])
예제 #3
0
def test_BuildFullFlowGraph_num_edges():
    """Test flow graph edge count."""
    # This test assumes that ControlFlowGraphFromDotSource() behaves as expected.
    # test_ControlFlowGraphFromDotSource_fizz_buzz() will fail if this is not the
    # case.
    cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT)
    sig = cfg.BuildFullFlowGraph()

    assert sig.number_of_edges() == 13
예제 #4
0
def test_BuildFullFlowGraph_fizz_buzz():
    """Test flow graph name."""
    # This test assumes that ControlFlowGraphFromDotSource() behaves as expected.
    # test_ControlFlowGraphFromDotSource_fizz_buzz() will fail if this is not the
    # case.
    cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT)
    sig = cfg.BuildFullFlowGraph()

    assert sig.graph["name"] == "FizzBuzz"
예제 #5
0
def test_BuildFullFlowGraph_exit_block():
    """Test flow graph has expected exit block."""
    # This test assumes that ControlFlowGraphFromDotSource() behaves as expected.
    # test_ControlFlowGraphFromDotSource_fizz_buzz() will fail if this is not the
    # case.
    cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT)
    sig = cfg.BuildFullFlowGraph()

    # Create a map of node names to indices.
    name_to_node = {data["name"]: node for node, data in sig.nodes(data=True)}

    assert sig.nodes[name_to_node["%9.1"]]["exit"]
예제 #6
0
def test_ControlFlowGraphFromDotSource_edges():
    """Test that CFG edges are as expected."""
    g = llvm_util.ControlFlowGraphFromDotSource(SIMPLE_C_DOT)
    node_name_to_index_map = {g.nodes[n]["name"]: n for n in g.nodes}
    edges = set(g.edges)

    assert (node_name_to_index_map["%2"],
            node_name_to_index_map["%9"]) in edges
    assert (node_name_to_index_map["%2"],
            node_name_to_index_map["%12"]) in edges
    assert (node_name_to_index_map["%9"],
            node_name_to_index_map["%18"]) in edges
    assert (node_name_to_index_map["%12"],
            node_name_to_index_map["%18"]) in edges
예제 #7
0
def test_ControlFlowGraphFromDotSource_positions():
    """A control flow graph with two nested loops.

    int A() {
      int n = 0;
      for (int i = 0; i < 10; ++i) {
        switch (n) {
          case 0:
            n += 1;
          case 1:
            n += 2;
          default:
            n += 3;
            break;
        }
      }
      return n;
    }
  """
    cfg = llvm_util.ControlFlowGraphFromDotSource("""
digraph "CFG for 'A' function" {
	label="CFG for 'A' function";

	Node0x7ff981522700 [shape=record,label="{%0:\l  %1 = alloca i32, align 4\l  %2 = alloca i32, align 4\l  store i32 0, i32* %1, align 4\l  store i32 0, i32* %2, align 4\l  br label %3\l}"];
	Node0x7ff981522700 -> Node0x7ff981522980;
	Node0x7ff981522980 [shape=record,label="{%3:\l\l  %4 = load i32, i32* %2, align 4\l  %5 = icmp slt i32 %4, 10\l  br i1 %5, label %6, label %21\l|{<s0>T|<s1>F}}"];
	Node0x7ff981522980:s0 -> Node0x7ff981522b50;
	Node0x7ff981522980:s1 -> Node0x7ff981522bd0;
	Node0x7ff981522b50 [shape=record,label="{%6:\l\l  %7 = load i32, i32* %1, align 4\l  switch i32 %7, label %14 [\l    i32 0, label %8\l    i32 1, label %11\l  ]\l|{<s0>def|<s1>0|<s2>1}}"];
	Node0x7ff981522b50:s0 -> Node0x7ff981522b90;
	Node0x7ff981522b50:s1 -> Node0x7ff981522d30;
	Node0x7ff981522b50:s2 -> Node0x7ff981522db0;
	Node0x7ff981522d30 [shape=record,label="{%8:\l\l  %9 = load i32, i32* %1, align 4\l  %10 = add nsw i32 %9, 1\l  store i32 %10, i32* %1, align 4\l  br label %11\l}"];
	Node0x7ff981522d30 -> Node0x7ff981522db0;
	Node0x7ff981522db0 [shape=record,label="{%11:\l\l  %12 = load i32, i32* %1, align 4\l  %13 = add nsw i32 %12, 2\l  store i32 %13, i32* %1, align 4\l  br label %14\l}"];
	Node0x7ff981522db0 -> Node0x7ff981522b90;
	Node0x7ff981522b90 [shape=record,label="{%14:\l\l  %15 = load i32, i32* %1, align 4\l  %16 = add nsw i32 %15, 3\l  store i32 %16, i32* %1, align 4\l  br label %17\l}"];
	Node0x7ff981522b90 -> Node0x7ff981522740;
	Node0x7ff981522740 [shape=record,label="{%17:\l\l  br label %18\l}"];
	Node0x7ff981522740 -> Node0x7ff981522d70;
	Node0x7ff981522d70 [shape=record,label="{%18:\l\l  %19 = load i32, i32* %2, align 4\l  %20 = add nsw i32 %19, 1\l  store i32 %20, i32* %2, align 4\l  br label %3\l}"];
	Node0x7ff981522d70 -> Node0x7ff981522980;
	Node0x7ff981522bd0 [shape=record,label="{%21:\l\l  %22 = load i32, i32* %1, align 4\l  ret i32 %22\l}"];
}
""")
    positions = set()
    for _, _, position in cfg.edges(data="position"):
        positions.add(position)
    assert positions == {0, 1, 2}
예제 #8
0
def test_ControlFlowGraphFromDotSource_fizz_buzz():
    """Test the fizz buzz graph properties."""
    cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT)
    assert cfg.graph["name"] == "FizzBuzz"
    assert cfg.number_of_nodes() == 4
    assert cfg.number_of_edges() == 4

    # Create a map of node names to indices.
    name_to_node = {data["name"]: node for node, data in cfg.nodes(data=True)}

    # Test that graph has required edges.
    assert cfg.has_edge(name_to_node["%1"], name_to_node["%7"])
    assert cfg.has_edge(name_to_node["%1"], name_to_node["%8"])
    assert cfg.has_edge(name_to_node["%7"], name_to_node["%9"])
    assert cfg.has_edge(name_to_node["%8"], name_to_node["%9"])
예제 #9
0
    def _Build(self, bytecode: str, tag_hook: llvm_util.TagHook):
        """Private implementation of Build function."""
        # First construct the control flow graphs using opt.
        (
            call_graph_dot,
            cfg_dots,
        ) = opt_util.DotCallGraphAndControlFlowGraphsFromBytecode(
            bytecode, opt_path=self.opt)

        # Then construct the call graph dot using opt.
        call_graph = cg.CallGraphFromDotSource(call_graph_dot)
        # Construct NetworkX control flow graphs from the dot graphs.
        cfgs = [
            llvm_util.ControlFlowGraphFromDotSource(cfg_dot, tag_hook=tag_hook)
            for cfg_dot in cfg_dots
        ]

        # Add data flow elements to control flow graphs.
        graphs = [self.CreateControlAndDataFlowUnion(cfg) for cfg in cfgs]
        # Finally, compose the per-function graphs into a whole-module graph.
        return self.ComposeGraphs(graphs, call_graph)
예제 #10
0
  def Build(
    self,
    bytecode: str,
    opt=None,
    tag_hook: typing.Optional[llvm_util.TagHook] = None,
  ) -> programl_pb2.ProgramGraph:
    """Construct a ProGraML from the given bytecode.

    Args:
      bytecode: The bytecode to construct the graph from.
      opt: The path to LLVM `opt` binary to use to construct control-flow and
        call graphs from. The default uses the opt binary packaged with
        //compilers/llvm:opt.
      tag_hook: An optional object that can tag specific nodes in the graph
                according to some logic.

    Returns:
      A networkx graph.
    """
    # First construct the control flow graphs using opt.
    (
      call_graph_dot,
      cfg_dots,
    ) = opt_util.DotCallGraphAndControlFlowGraphsFromBytecode(
      bytecode, opt_path=opt
    )

    # Then construct the call graph dot using opt.
    call_graph = cg.CallGraphFromDotSource(call_graph_dot)
    # Construct NetworkX control flow graphs from the dot graphs.
    cfgs = [
      llvm_util.ControlFlowGraphFromDotSource(cfg_dot, tag_hook=tag_hook)
      for cfg_dot in cfg_dots
    ]

    # Add data flow elements to control flow graphs.
    graphs = [self.CreateControlAndDataFlowUnion(cfg) for cfg in cfgs]
    # Finally, compose the per-function graphs into a whole-module graph.
    return self.ComposeGraphs(graphs, call_graph)
예제 #11
0
def test_BuildFullFlowGraph_node_text():
    """Test flow graph nodes have expected text."""
    # This test assumes that ControlFlowGraphFromDotSource() behaves as expected.
    # test_ControlFlowGraphFromDotSource_fizz_buzz() will fail if this is not the
    # case.
    cfg = llvm_util.ControlFlowGraphFromDotSource(FIZZBUZZ_DOT)
    sig = cfg.BuildFullFlowGraph()

    # Create a map of node names to indices.
    name_to_node = {data["name"]: node for node, data in sig.nodes(data=True)}

    # Block %1.
    assert sig.nodes[
        name_to_node["%1.0"]]["text"] == "%2 = alloca i32, align 4"
    assert sig.nodes[
        name_to_node["%1.1"]]["text"] == "%3 = alloca i32, align 4"
    assert (sig.nodes[name_to_node["%1.2"]]["text"] ==
            "store i32 %0, i32* %3, align 4")
    assert (sig.nodes[name_to_node["%1.3"]]["text"] ==
            "%4 = load i32, i32* %3, align 4")
    assert sig.nodes[name_to_node["%1.4"]]["text"] == "%5 = srem i32 %4, 15"
    assert sig.nodes[name_to_node["%1.5"]]["text"] == "%6 = icmp eq i32 %5, 0"
    # Note the conditional branch instruction has had the labels stripped.
    assert sig.nodes[name_to_node["%1.6"]]["text"] == "br i1 %6"

    # Block %7.
    assert (sig.nodes[name_to_node["%7.0"]]["text"] ==
            "store i32 1, i32* %2, align 4")

    # Block %8.
    assert (sig.nodes[name_to_node["%8.0"]]["text"] ==
            "store i32 0, i32* %2, align 4")

    # Block %9.
    assert (sig.nodes[name_to_node["%9.0"]]["text"] ==
            "%10 = load i32, i32* %2, align 4")
    assert sig.nodes[name_to_node["%9.1"]]["text"] == "ret i32 %10"
예제 #12
0
def CreateControlFlowGraphFromOpenClKernel(
        kernel_name: str,
        opencl_kernel: str) -> typing.Optional[cfg.ControlFlowGraph]:
    """Try to create a CFG proto from an opencl kernel.

  Args:
    kernel_name: The name of the OpenCL kernel defined in opencl_kernel.
    opencl_kernel: A string of OpenCL. This should contain a single kernel
      definition.

  Returns:
    A ControlFlowGraph instance, or None if compilation to bytecode fails.

  Raises:
    ClangException: If compiling to bytecode fails.
    ValueError: If opencl_kernel contains multiple functions.
  """
    bytecode, _ = BytecodeFromOpenClString(opencl_kernel, "-O0")

    # Extract a single dot source from the bytecode.
    dot_generator = opt_util.DotControlFlowGraphsFromBytecode(bytecode)
    dot = next(dot_generator)
    try:
        next(dot_generator)
        raise ValueError("Bytecode produced more than one dot source!")
    except StopIteration:
        pass

    # Instantiate a CFG from the dot source.
    graph = llvm_util.ControlFlowGraphFromDotSource(dot)

    # Set the name of the graph to the kernel name. This is because the src code
    # has been preprocessed, so that each kernel is named 'A'.
    graph.graph["name"] = kernel_name

    return graph
예제 #13
0
    ],
  )
=======
  scop_graphs, _ = opt_util.DotGraphsFromBytecode(bytecode, [
      '-O1', '-polly-process-unprofitable', '-polly-optimized-scops',
      '-polly-dot', '-polly-optimizer=none'
  ])
>>>>>>> edb8c21d9... Automated code format.:deeplearning/ml4pl/graphs/labelled/polyhedra/polyhedra.py

  # Loop over each function
  max_steps = 0
  cdfgs = []
  for i, graph in enumerate(scop_graphs):
    graph_annotator = PolyhedralRegionAnnotator()
    dot = graph
    cfg = llvm_util.ControlFlowGraphFromDotSource(dot, tag_hook=graph_annotator)
    builder = graph_builder.ProGraMLGraphBuilder()
    annotated_cdfg = builder.BuildFromControlFlowGraph(cfg)

<<<<<<< HEAD:deeplearning/ml4pl/graphs/labelled/dataflow/polyhedra/polyhedra.py
    steps = sum(
      1
      for nid, node in annotated_cdfg.nodes(data=True)
      if node.get("polyhedral")
    )
=======
    steps = sum(1 for nid, node in annotated_cdfg.nodes(data=True)
                if node.get('polyhedral'))
>>>>>>> edb8c21d9... Automated code format.:deeplearning/ml4pl/graphs/labelled/polyhedra/polyhedra.py
    max_steps = max(max_steps, steps)
    cdfgs.append(annotated_cdfg)
예제 #14
0
def test_ControlFlowGraphFromDotSource_invalid_source():
    """Test that exception is raised if dot can't be parsed."""
    with test.Raises(pyparsing.ParseException):
        llvm_util.ControlFlowGraphFromDotSource("invalid dot source!")
예제 #15
0
def test_ControlFlowGraphFromDotSource_graph_name():
    """Test that CFG has correct name."""
    g = llvm_util.ControlFlowGraphFromDotSource(SIMPLE_C_DOT)
    assert g.graph["name"] == "DoSomething"
예제 #16
0
def test_ControlFlowGraphFromDotSource_num_edges():
    """Test that CFG has correct number of edges."""
    g = llvm_util.ControlFlowGraphFromDotSource(SIMPLE_C_DOT)
    assert g.number_of_edges() == 4
예제 #17
0
def test_ControlFlowGraphFromDotSource_node_names():
    """Test that CFG names are as expected."""
    g = llvm_util.ControlFlowGraphFromDotSource(SIMPLE_C_DOT)
    node_names = sorted([g.nodes[n]["name"] for n in g.nodes],
                        key=lambda x: int(x[1:]))
    assert node_names == ["%2", "%9", "%12", "%18"]
예제 #18
0
    ClangException: If compiling to bytecode fails.
  """
  graphs = []

  try:
    bytecode, _ = BytecodeFromLinuxSrc(path, "-O0")
  except clang.ClangException:
    return graphs

  # Extract a dot sources from the bytecode.
  dot_generator = opt_util.DotControlFlowGraphsFromBytecode(bytecode)
  while True:
    try:
      dot = next(dot_generator)
      # Instantiate a CFG from the dot source.
      graph = llvm_util.ControlFlowGraphFromDotSource(dot)
      graph.ValidateControlFlowGraph(strict=False)
      graphs.append(graph)
    except (
      UnicodeDecodeError,
      cfg.MalformedControlFlowGraphError,
      ValueError,
      opt.OptException,
      pyparsing.ParseException,
    ):
      pass
    except StopIteration:
      break

  return graphs
예제 #19
0
def MakePolyhedralGraphs(
    bytecode: str,
    n: typing.Optional[int] = None,
    false=False,
    true=True,
) -> typing.Iterable[nx.MultiDiGraph]:
    """Create an annotated graph from a bytecode that potentially contains
     polyhedral loops.

  Args:
    bytecode: The bytecode which produced the input graph.
    n: The maximum number of graphs to produce. This value is ignored and one graph
      will be produced with all polyhedral regions annotated.
    false: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method
      is hardcoded to use 2-class 1-hots.
    true: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method
      is hardcoded to use 2-class 1-hots.

  Returns:
    A generator of annotated graphs, where each graph has 'x' and 'y' labels on
    the statement nodes, and additionally a 'data_flow_max_steps_required'
    attribute which is set to the largest number of statements in a polyhedral block.
  """
    # TODO(github.com/ChrisCummins/ProGraML/issues/2): Replace true/false args
    # with a list of class values for all graph annotator functions.
    del false
    del true
    del n

    # One-hot encoding
    false = np.array([1, 0], np.int64)
    true = np.array([0, 1], np.int64)

    # Canonicalize input graph (see http://polly.llvm.org/docs/Architecture.html)
    bytecode = BytecodeToPollyCanonicalized(bytecode)
    g = CreateCDFG(bytecode)

    # Build the polyhedral building blocks
    scop_graphs, _ = opt_util.DotGraphsFromBytecode(
        bytecode,
        [
            "-O1",
            "-polly-process-unprofitable",
            "-polly-optimized-scops",
            "-polly-dot",
            "-polly-optimizer=none",
        ],
    )

    # Loop over each function
    max_steps = 0
    cdfgs = []
    for i, graph in enumerate(scop_graphs):
        graph_annotator = PolyhedralRegionAnnotator()
        dot = graph
        cfg = llvm_util.ControlFlowGraphFromDotSource(dot,
                                                      tag_hook=graph_annotator)
        builder = graph_builder.ProGraMLGraphBuilder()
        annotated_cdfg = builder.BuildFromControlFlowGraph(cfg)

        steps = sum(1 for nid, node in annotated_cdfg.nodes(data=True)
                    if node.get("polyhedral"))
        max_steps = max(max_steps, steps)
        cdfgs.append(annotated_cdfg)

    labelled = g.copy()
    labelled.data_flow_max_steps_required = max_steps
    AnnotatePolyhedra(labelled, cdfgs, false=false, true=true)
    yield labelled