def test_get_function_cfg_on_ir(self): mod = self.build_ir_module() foo = mod.get_global('foo') dot_showing_inst = llvm.get_function_cfg(foo) dot_without_inst = llvm.get_function_cfg(foo, show_inst=False) inst = "%.5 = add i32 %.1, %.2" self.assertIn(inst, dot_showing_inst) self.assertNotIn(inst, dot_without_inst)
def test_function_cfg_on_llvm_value(self): defined = self.module().get_function('sum') dot_showing_inst = llvm.get_function_cfg(defined, show_inst=True) dot_without_inst = llvm.get_function_cfg(defined, show_inst=False) # Check "digraph" prefix = 'digraph' self.assertIn(prefix, dot_showing_inst) self.assertIn(prefix, dot_without_inst) # Check function name fname = "CFG for 'sum' function" self.assertIn(fname, dot_showing_inst) self.assertIn(fname, dot_without_inst) # Check instruction inst = "%.3 = add i32 %.1, %.2" self.assertIn(inst, dot_showing_inst) self.assertNotIn(inst, dot_without_inst)
def __init__(self, cres, name, py_func, **kwargs): self.cres = cres self.name = name self.py_func = py_func fn = cres.get_function(name) self.dot = ll.get_function_cfg(fn) self.kwargs = kwargs
def get_function_cfg(self, name): """ Get control-flow graph of the LLVM function """ fn = self.get_function(name) dot = ll.get_function_cfg(fn) return _CFG(dot)
def execute(ir_mod): llvm.initialize() llvm.initialize_native_target() llvm.initialize_native_asmprinter() llmod = llvm.parse_assembly(str(ir_mod)) print('optimized'.center(80, '-')) pmb = llvm.create_pass_manager_builder() pmb.opt_level = 1 pm = llvm.create_module_pass_manager() pmb.populate(pm) pm.run(llmod) print(llmod) target_machine = llvm.Target.from_default_triple().create_target_machine() with llvm.create_mcjit_compiler(llmod, target_machine) as ee: ee.finalize_object() cfptr = ee.get_function_address("entry_fib") from ctypes import CFUNCTYPE, c_int cfunc = CFUNCTYPE(c_int, c_int)(cfptr) # TEST for i in range(12): res = cfunc(i) print('fib({}) = {}'.format(i, res)) # Get CFG ll_fib_more = llmod.get_function('fib_more') cfg = llvm.get_function_cfg(ll_fib_more) llvm.view_dot_graph(cfg, view=True)
def get_function_cfg(self, name): """ Get control-flow graph of the LLVM function """ self._sentry_cache_disable_inspection() fn = self.get_function(name) dot = ll.get_function_cfg(fn) return _CFG(dot)
def graph(module): module_ref = llvm.parse_assembly(str(module)) functions = module_ref.functions images = [] for func in functions: cfg = llvm.get_function_cfg(func) graph = llvm.view_dot_graph(cfg, view=False) image = graph.render(format='png', directory="graphs") images.append(image) return images
def __get_cfg_all(self, path): f = open(path, "r") llvm_ir = f.read() f.close() mod = llvm.parse_assembly(llvm_ir) graphs = [] for function in mod.functions: cfg = llvm.get_function_cfg(function, show_inst=True) p = pydot.graph_from_dot_data(cfg) cfg_graph = nx.nx_pydot.from_pydot(p[0]) graphs.append(cfg_graph) return nx.compose_all(graphs)
def __get_cfg_main(self, path): f = open(path, "r") llvm_ir = f.read() f.close() mod = llvm.parse_assembly(llvm_ir) for function in mod.functions: if function.name == "main": cfg = llvm.get_function_cfg(function, show_inst=True) pass p = pydot.graph_from_dot_data(cfg) cfg_graph = nx.nx_pydot.from_pydot(p[0]) return cfg_graph
def exitProgram(self, ctx): print "* Target cpu: " + llvm.get_host_cpu_name() programAst = ProgramAST() for child in ctx.getChildren(): child_ast = self.prop[child] programAst.asts.append(child_ast) mod, cfg_list = programAst.codeGenerate(self.var_ptr_symbolTBL) strmod = str(mod) print "=== Generated IR code ===\n" print strmod with open("output.ll", 'w') as f: f.write(strmod) llmod = llvm.parse_assembly(strmod) answer = raw_input('* Optimizing this code? (y/n): ') if answer.lower() == "y": opt = True else: opt = False if opt: pm = llvm.create_module_pass_manager() pmb = llvm.create_pass_manager_builder() pmb.opt_level = 3 # -O3 pmb.populate(pm) # optimize pm.run(llmod) print "=== Generated optimized IR code ===\n" print llmod with open("output_opt.ll", 'w') as f: f.write(str(llmod)) llmod.verify() with llvm.create_mcjit_compiler(llmod, self.tm) as ee: ee.finalize_object() print "=== Generated assembly code ===\n" print(self.tm.emit_assembly(llmod)) with open("output.asm", 'w') as f: f.write(self.tm.emit_assembly(llmod)) answer = raw_input('Do you want to create CFG Graph? (y/n) : ') if answer.lower() == 'y': for cfg in cfg_list: dot = llvm.get_function_cfg(cfg) llvm.view_dot_graph(dot ,filename=cfg.name,view = True)
def exitProgram(self, ctx): print "* Target cpu: " + llvm.get_host_cpu_name() programAst = ProgramAST() for child in ctx.getChildren(): child_ast = self.prop[child] programAst.asts.append(child_ast) mod, cfg_list = programAst.codeGenerate(self.var_ptr_symbolTBL) strmod = str(mod) print "=== Generated IR code ===\n" print strmod with open("output.ll", 'w') as f: f.write(strmod) llmod = llvm.parse_assembly(strmod) answer = raw_input('* Optimizing this code? (y/n): ') if answer.lower() == "y": opt = True else: opt = False if opt: pm = llvm.create_module_pass_manager() pmb = llvm.create_pass_manager_builder() pmb.opt_level = 3 # -O3 pmb.populate(pm) # optimize pm.run(llmod) print "=== Generated optimized IR code ===\n" print llmod with open("output_opt.ll", 'w') as f: f.write(str(llmod)) llmod.verify() with llvm.create_mcjit_compiler(llmod, self.tm) as ee: ee.finalize_object() print "=== Generated assembly code ===\n" print(self.tm.emit_assembly(llmod)) with open("output.asm", 'w') as f: f.write(self.tm.emit_assembly(llmod)) answer = raw_input('Do you want to create CFG Graph? (y/n) : ') if answer.lower() == 'y': for cfg in cfg_list: dot = llvm.get_function_cfg(cfg) llvm.view_dot_graph(dot, filename=cfg.name, view=True)
def set_blocknames(kfunction): s = llvm.get_function_cfg(kfunction.valueref) dotG = graph_from_dot_data(s)[0] dotG.write_png("/libx32/llvmlite/" + kfunction.functionname + ".png") blocknames = [] for each_node in dotG.get_nodes(): for each_attr_key, each_attr_val in each_node.get_attributes().items(): if (each_attr_key == "label"): ## print (("label "+re.findall(r"%\d+", each_attr_val)[0],re.findall(r"label %\d+", each_attr_val))) blocknames.append("label " + re.findall(r"%\d+", each_attr_val)[0]) for i in range(len(kfunction.blocks)): kfunction.blocks[i].name = blocknames[i]
def main(bv: BinaryView): # Lift the `target` function to IR module = ir.Module(name=__file__) f: Function = bv.get_function_at( bv.get_symbols_by_name('target')[0].address) lifter = FunctionLifter(module, f) lifter.run() # Output the optimized IR to a CFG opt_module: llvm.ModuleRef = lifter.optimize(3) opt_target = opt_module.get_function(f.name) dot = llvm.get_function_cfg(opt_target) open('output.opt.dot', 'w').write(dot) # Execute the LLVM IR engine = create_execution_engine() mod = compile_ir(engine, opt_module) func_ptr = engine.get_function_address("target_0") cfunc = CFUNCTYPE(c_int64, c_int)(func_ptr) print(cfunc(10))
def pretty_printer(self, filename=None, view=None, highlight=True, interleave=False, strip_ir=False, show_key=True, fontsize=10): """ "Pretty" prints the DOT graph of the CFG. For explanation of the parameters see the docstring for numba.core.dispatcher::inspect_cfg. """ import graphviz as gv import re import json import inspect from llvmlite import binding as ll from numba.typed import List from types import SimpleNamespace from collections import defaultdict _default = False _highlight = SimpleNamespace(incref=_default, decref=_default, returns=_default, raises=_default, meminfo=_default, branches=_default) _interleave = SimpleNamespace(python=_default, lineinfo=_default) def parse_config(_config, kwarg): """ Parses the kwarg into a consistent format for use in configuring the Digraph rendering. _config is the configuration instance to update, kwarg is the kwarg on which to base the updates. """ if isinstance(kwarg, bool): for attr in _config.__dict__: setattr(_config, attr, kwarg) elif isinstance(kwarg, dict): for k, v in kwarg.items(): if k not in _config.__dict__: raise ValueError("Unexpected key in kwarg: %s" % k) if isinstance(v, bool): setattr(_config, k, v) else: msg = "Unexpected value for key: %s, got:%s" raise ValueError(msg % (k, v)) elif isinstance(kwarg, set): for item in kwarg: if item not in _config.__dict__: raise ValueError("Unexpected key in kwarg: %s" % item) else: setattr(_config, item, True) else: msg = "Unhandled configuration type for kwarg %s" raise ValueError(msg % type(kwarg)) parse_config(_highlight, highlight) parse_config(_interleave, interleave) # This is the colour scheme. The graphviz HTML label renderer only takes # names for colours: https://www.graphviz.org/doc/info/shapes.html#html cs = defaultdict(lambda: 'white') # default bg colour is white cs['marker'] = 'orange' cs['python'] = 'yellow' cs['truebr'] = 'green' cs['falsebr'] = 'red' cs['incref'] = 'cyan' cs['decref'] = 'turquoise' cs['raise'] = 'lightpink' cs['meminfo'] = 'lightseagreen' cs['return'] = 'purple' # Get the raw dot format information from LLVM and the LLVM IR fn = self.cres.get_function(self.name) #raw_dot = ll.get_function_cfg(fn).replace('\\l...', '') llvm_str = self.cres.get_llvm_str() def get_metadata(llvm_str): """ Gets the metadata entries from the LLVM IR, these look something like '!123 = INFORMATION'. Returns a map of metadata key to metadata value, i.e. from the example {'!123': INFORMATION}""" md = {} metadata_entry = re.compile(r'(^[!][0-9]+)(\s+=\s+.*)') for x in llvm_str.splitlines(): match = metadata_entry.match(x) if match is not None: g = match.groups() if g is not None: assert len(g) == 2 md[g[0]] = g[1] return md md = get_metadata(llvm_str) # setup digraph with initial properties def init_digraph(name, fname, fontsize): # name and fname are arbitrary graph and file names, they appear in # some rendering formats, the fontsize determines the output # fontsize. f = gv.Digraph(name, fname) f.attr(rankdir='TB') f.attr('node', shape='none', fontsize='%s' % str(fontsize)) return f f = init_digraph(self.name, self.name, fontsize) # A lot of regex is needed to parse the raw dot output. This output # contains a mix of LLVM IR in the labels, and also DOT markup. # DOT syntax, matches a "port" (where the tail of an edge starts) port_match = re.compile('.*{(.*)}.*') # DOT syntax, matches the "port" value from a found "port_match" port_jmp_match = re.compile('.*<(.*)>(.*)') # LLVM syntax, matches a LLVM debug marker metadata_marker = re.compile(r'.*!dbg\s+(![0-9]+).*') # LLVM syntax, matches a location entry location_expr = (r'.*!DILocation\(line:\s+([0-9]+),' r'\s+column:\s+([0-9]),.*') location_entry = re.compile(location_expr) # LLVM syntax, matches LLVMs internal debug value calls dbg_value = re.compile(r'.*call void @llvm.dbg.value.*') # LLVM syntax, matches tokens for highlighting nrt_incref = re.compile(r"@NRT_incref\b") nrt_decref = re.compile(r"@NRT_decref\b") nrt_meminfo = re.compile("@NRT_MemInfo") ll_raise = re.compile("ret i32 1,") ll_return = re.compile("ret i32 [^1],") # wrapper function for line wrapping LLVM lines def wrap(s): return textwrap.wrap(s, width=120, subsequent_indent='... ') # function to fix (sometimes escaped for DOT!) LLVM IR etc that needs to # be HTML escaped def clean(s): s = html.escape(s) # deals with &, < and > s = s.replace('\\{', "{") s = s.replace('\\}', "}") s = s.replace('\\', "\") s = s.replace('%', "%") return s.replace('!', "!") # These hold the node and edge ids from the raw dot information. They # are used later to wire up a new DiGraph that has the same structure # as the raw dot but with new nodes. node_ids = {} edge_ids = {} # Python source lines, used if python source interleave is requested if _interleave.python: src_code, firstlineno = inspect.getsourcelines(self.py_func) # This is the dot info from LLVM, it's in DOT form and has continuation # lines, strip them and then re-parse into `dot_json` form for use in # producing a formatted output. raw_dot = ll.get_function_cfg(fn).replace('\\l...', '') json_bytes = gv.Source(raw_dot).pipe(format='dot_json') jzon = json.loads(json_bytes.decode('utf-8')) idc = 0 # Walk the "objects" (nodes) in the DOT output for obj in jzon['objects']: # These are used to keep tabs on the current line and column numbers # as per the markers. They are tracked so as to make sure a marker # is only emitted if there's a change in the marker. cur_line, cur_col = -1, -1 label = obj['label'] name = obj['name'] gvid = obj['_gvid'] node_ids[gvid] = name # Label is DOT format, it needs the head and tail removing and then # splitting for walking. label = label[1:-1] lines = label.split('\\l') # Holds the new lines new_lines = [] # Aim is to produce an HTML table a bit like this: # # |------------| # | HEADER | <-- this is the block header # |------------| # | LLVM SRC | <-- # | Marker? | < this is the label/block body # | Python src?| <-- # |------------| # | T | F | <-- this is the "ports", also determines col_span # -------------- # # This is HTML syntax, its the column span. If there's a switch or a # branch at the bottom of the node this is rendered as multiple # columns in a table. First job is to go and render that and work # out how many columns are needed as that dictates how many columns # the rest of the source lines must span. In DOT syntax the places # that edges join nodes are referred to as "ports". Syntax in DOT # is like `node:port`. col_span = 1 # First see if there is a port entry for this node port_line = '' matched = port_match.match(lines[-1]) sliced_lines = lines if matched is not None: # There is a port ports = matched.groups()[0] ports_tokens = ports.split('|') col_span = len(ports_tokens) # Generate HTML table data cells, one for each port. If the # ports correspond to a branch then they can optionally # highlighted based on T/F. tdfmt = ('<td BGCOLOR="{}" BORDER="1" ALIGN="center" ' 'PORT="{}">{}</td>') tbl_data = [] if _highlight.branches: colors = {'T': cs['truebr'], 'F': cs['falsebr']} else: colors = {} for tok in ports_tokens: target, value = port_jmp_match.match(tok).groups() color = colors.get(value, 'white') tbl_data.append(tdfmt.format(color, target, value)) port_line = ''.join(tbl_data) # Drop the last line from the rest of the parse as it's the port # and just been dealt with. sliced_lines = lines[:-1] # loop peel the block header, it needs a HTML border fmtheader = ('<tr><td BGCOLOR="{}" BORDER="1" ALIGN="left" ' 'COLSPAN="{}">{}</td></tr>') new_lines.append( fmtheader.format(cs['default'], col_span, clean(sliced_lines[0].strip()))) # process rest of block creating the table row at a time. fmt = ('<tr><td BGCOLOR="{}" BORDER="0" ALIGN="left" ' 'COLSPAN="{}">{}</td></tr>') def metadata_interleave(l, new_lines): """ Search line `l` for metadata associated with python or line info and inject it into `new_lines` if requested. """ matched = metadata_marker.match(l) if matched is not None: # there's a metadata marker g = matched.groups() if g is not None: assert len(g) == 1, g marker = g[0] debug_data = md.get(marker, None) if debug_data is not None: # and the metadata marker has a corresponding piece # of metadata ld = location_entry.match(debug_data) if ld is not None: # and the metadata is line info... proceed assert len(ld.groups()) == 2, ld line, col = ld.groups() # only emit a new marker if the line number in # the metadata is "new". if line != cur_line or col != cur_col: if _interleave.lineinfo: mfmt = 'Marker %s, Line %s, column %s' mark_line = mfmt % (marker, line, col) ln = fmt.format( cs['marker'], col_span, clean(mark_line)) new_lines.append(ln) if _interleave.python: # TODO: # +1 for decorator, this probably needs # the same thing doing as for the # error messages where the decorator # is scanned for, its not always +1! lidx = int(line) - (firstlineno + 1) source_line = src_code[lidx + 1] ln = fmt.format( cs['python'], col_span, clean(source_line)) new_lines.append(ln) return line, col for l in sliced_lines[1:]: # Drop LLVM debug call entries if dbg_value.match(l): continue # if requested generate interleaving of markers or python from # metadata if _interleave.lineinfo or _interleave.python: updated_lineinfo = metadata_interleave(l, new_lines) if updated_lineinfo is not None: cur_line, cur_col = updated_lineinfo # Highlight other LLVM features if requested, HTML BGCOLOR # property is set by this. if _highlight.incref and nrt_incref.search(l): colour = cs['incref'] elif _highlight.decref and nrt_decref.search(l): colour = cs['decref'] elif _highlight.meminfo and nrt_meminfo.search(l): colour = cs['meminfo'] elif _highlight.raises and ll_raise.search(l): # search for raise as its more specific than exit colour = cs['raise'] elif _highlight.returns and ll_return.search(l): colour = cs['return'] else: colour = cs['default'] # Use the default coloring as a flag to force printing if a # special token print was requested AND LLVM ir stripping is # required if colour is not cs['default'] or not strip_ir: for x in wrap(clean(l)): new_lines.append(fmt.format(colour, col_span, x)) # add in the port line at the end of the block if it was present # (this was built right at the top of the parse) if port_line: new_lines.append('<tr>{}</tr>'.format(port_line)) # If there was data, create a table, else don't! dat = ''.join(new_lines) if dat: tab = (('<table id="%s" BORDER="1" CELLBORDER="0" ' 'CELLPADDING="0" CELLSPACING="0">%s</table>') % (idc, dat)) label = '<{}>'.format(tab) else: label = '' # finally, add a replacement node for the original with a new marked # up label. f.node(name, label=label) # Parse the edge data if 'edges' in jzon: # might be a single block, no edges for edge in jzon['edges']: gvid = edge['_gvid'] tp = edge.get('tailport', None) edge_ids[gvid] = (edge['head'], edge['tail'], tp) # Write in the edge wiring with respect to the new nodes:ports. for gvid, edge in edge_ids.items(): tail = node_ids[edge[1]] head = node_ids[edge[0]] port = edge[2] if port is not None: tail += ':%s' % port f.edge(tail, head) # Add a key to the graph if requested. if show_key: key_tab = [] for k, v in cs.items(): key_tab.append( ('<tr><td BGCOLOR="{}" BORDER="0" ALIGN="center"' '>{}</td></tr>').format(v, k)) # The first < and last > are DOT syntax, rest is DOT HTML. f.node("Key", label=('<<table BORDER="1" CELLBORDER="1" ' 'CELLPADDING="2" CELLSPACING="1"><tr><td BORDER="0">' 'Key:</td></tr>{}</table>>').format( ''.join(key_tab))) # Render if required if filename is not None or view is not None: f.render(filename=filename, view=view, format='pdf') # Else pipe out a SVG return f.pipe(format='svg')
import llvmlite.binding as llvm import pygraphviz import networkx mod = llvm.parse_bitcode(open("curl-7.54.0/src/.libs/curl.0.4.opt.bc", 'r').read()) mod.verify() graph = "" for func in mod.functions: graph += llvm.get_function_cfg(func) graph2 = pygraphviz.AGraph(graph) graph3 = networkx.Graph(graph2) networkx.draw(graph3)