Example #1
0
 def test_get_function_cfg_on_ir(self):
     mod = self.build_ir_module()
     foo = mod.get_global('foo')
     dot_showing_inst = llvm.get_function_cfg(foo)
     dot_without_inst = llvm.get_function_cfg(foo, show_inst=False)
     inst = "%.5 = add i32 %.1, %.2"
     self.assertIn(inst, dot_showing_inst)
     self.assertNotIn(inst, dot_without_inst)
Example #2
0
 def test_function_cfg_on_llvm_value(self):
     defined = self.module().get_function('sum')
     dot_showing_inst = llvm.get_function_cfg(defined, show_inst=True)
     dot_without_inst = llvm.get_function_cfg(defined, show_inst=False)
     # Check "digraph"
     prefix = 'digraph'
     self.assertIn(prefix, dot_showing_inst)
     self.assertIn(prefix, dot_without_inst)
     # Check function name
     fname = "CFG for 'sum' function"
     self.assertIn(fname, dot_showing_inst)
     self.assertIn(fname, dot_without_inst)
     # Check instruction
     inst = "%.3 = add i32 %.1, %.2"
     self.assertIn(inst, dot_showing_inst)
     self.assertNotIn(inst, dot_without_inst)
Example #3
0
 def __init__(self, cres, name, py_func, **kwargs):
     self.cres = cres
     self.name = name
     self.py_func = py_func
     fn = cres.get_function(name)
     self.dot = ll.get_function_cfg(fn)
     self.kwargs = kwargs
Example #4
0
 def get_function_cfg(self, name):
     """
     Get control-flow graph of the LLVM function
     """
     fn = self.get_function(name)
     dot = ll.get_function_cfg(fn)
     return _CFG(dot)
Example #5
0
def execute(ir_mod):
    llvm.initialize()
    llvm.initialize_native_target()
    llvm.initialize_native_asmprinter()

    llmod = llvm.parse_assembly(str(ir_mod))

    print('optimized'.center(80, '-'))
    pmb = llvm.create_pass_manager_builder()
    pmb.opt_level = 1
    pm = llvm.create_module_pass_manager()
    pmb.populate(pm)
    pm.run(llmod)
    print(llmod)

    target_machine = llvm.Target.from_default_triple().create_target_machine()

    with llvm.create_mcjit_compiler(llmod, target_machine) as ee:
        ee.finalize_object()
        cfptr = ee.get_function_address("entry_fib")

        from ctypes import CFUNCTYPE, c_int

        cfunc = CFUNCTYPE(c_int, c_int)(cfptr)

        # TEST
        for i in range(12):
            res = cfunc(i)
            print('fib({}) = {}'.format(i, res))

        # Get CFG
        ll_fib_more = llmod.get_function('fib_more')
        cfg = llvm.get_function_cfg(ll_fib_more)
        llvm.view_dot_graph(cfg, view=True)
Example #6
0
 def get_function_cfg(self, name):
     """
     Get control-flow graph of the LLVM function
     """
     self._sentry_cache_disable_inspection()
     fn = self.get_function(name)
     dot = ll.get_function_cfg(fn)
     return _CFG(dot)
Example #7
0
 def get_function_cfg(self, name):
     """
     Get control-flow graph of the LLVM function
     """
     self._sentry_cache_disable_inspection()
     fn = self.get_function(name)
     dot = ll.get_function_cfg(fn)
     return _CFG(dot)
def graph(module):
    module_ref = llvm.parse_assembly(str(module))
    functions = module_ref.functions
    images = []
    for func in functions:
        cfg = llvm.get_function_cfg(func)
        graph = llvm.view_dot_graph(cfg, view=False)
        image = graph.render(format='png', directory="graphs")
        images.append(image)
    return images
Example #9
0
 def __get_cfg_all(self, path):
     f = open(path, "r")
     llvm_ir = f.read()
     f.close()
     mod = llvm.parse_assembly(llvm_ir)
     graphs = []
     for function in mod.functions:
         cfg = llvm.get_function_cfg(function, show_inst=True)
         p = pydot.graph_from_dot_data(cfg)
         cfg_graph = nx.nx_pydot.from_pydot(p[0])
         graphs.append(cfg_graph)
     return nx.compose_all(graphs)
Example #10
0
 def __get_cfg_main(self, path):
     f = open(path, "r")
     llvm_ir = f.read()
     f.close()
     mod = llvm.parse_assembly(llvm_ir)
     for function in mod.functions:
         if function.name == "main":
             cfg = llvm.get_function_cfg(function, show_inst=True)
             pass
     p = pydot.graph_from_dot_data(cfg)
     cfg_graph = nx.nx_pydot.from_pydot(p[0])
     return cfg_graph
Example #11
0
    def exitProgram(self, ctx):
        print "* Target cpu: " + llvm.get_host_cpu_name()
        programAst = ProgramAST()
        for child in ctx.getChildren():
            child_ast = self.prop[child]
            programAst.asts.append(child_ast) 
        mod, cfg_list  = programAst.codeGenerate(self.var_ptr_symbolTBL)
        strmod = str(mod)
        print "=== Generated IR code ===\n"
        print strmod
        with open("output.ll", 'w') as f:
            f.write(strmod)
 
        llmod = llvm.parse_assembly(strmod)
        answer = raw_input('* Optimizing this code? (y/n): ')
        if answer.lower() == "y":
            opt = True
        else:
            opt = False

        if opt:
            pm = llvm.create_module_pass_manager()
            pmb = llvm.create_pass_manager_builder()
            pmb.opt_level = 3  # -O3
            pmb.populate(pm)
            # optimize
            pm.run(llmod)
            print "=== Generated optimized IR code ===\n"
            print llmod
            with open("output_opt.ll", 'w') as f:
                f.write(str(llmod))


        llmod.verify()
        with llvm.create_mcjit_compiler(llmod, self.tm) as ee:
            ee.finalize_object()
            print "=== Generated assembly code ===\n"
            print(self.tm.emit_assembly(llmod))
            with open("output.asm", 'w') as f:
                f.write(self.tm.emit_assembly(llmod))
        answer = raw_input('Do you want to create CFG Graph? (y/n) : ')
        if answer.lower() == 'y': 
            for cfg in cfg_list:
                dot = llvm.get_function_cfg(cfg)
                llvm.view_dot_graph(dot ,filename=cfg.name,view = True)
Example #12
0
    def exitProgram(self, ctx):
        print "* Target cpu: " + llvm.get_host_cpu_name()
        programAst = ProgramAST()
        for child in ctx.getChildren():
            child_ast = self.prop[child]
            programAst.asts.append(child_ast)
        mod, cfg_list = programAst.codeGenerate(self.var_ptr_symbolTBL)
        strmod = str(mod)
        print "=== Generated IR code ===\n"
        print strmod
        with open("output.ll", 'w') as f:
            f.write(strmod)

        llmod = llvm.parse_assembly(strmod)
        answer = raw_input('* Optimizing this code? (y/n): ')
        if answer.lower() == "y":
            opt = True
        else:
            opt = False

        if opt:
            pm = llvm.create_module_pass_manager()
            pmb = llvm.create_pass_manager_builder()
            pmb.opt_level = 3  # -O3
            pmb.populate(pm)
            # optimize
            pm.run(llmod)
            print "=== Generated optimized IR code ===\n"
            print llmod
            with open("output_opt.ll", 'w') as f:
                f.write(str(llmod))

        llmod.verify()
        with llvm.create_mcjit_compiler(llmod, self.tm) as ee:
            ee.finalize_object()
            print "=== Generated assembly code ===\n"
            print(self.tm.emit_assembly(llmod))
            with open("output.asm", 'w') as f:
                f.write(self.tm.emit_assembly(llmod))
        answer = raw_input('Do you want to create CFG Graph? (y/n) : ')
        if answer.lower() == 'y':
            for cfg in cfg_list:
                dot = llvm.get_function_cfg(cfg)
                llvm.view_dot_graph(dot, filename=cfg.name, view=True)
Example #13
0
def set_blocknames(kfunction):

    s = llvm.get_function_cfg(kfunction.valueref)

    dotG = graph_from_dot_data(s)[0]

    dotG.write_png("/libx32/llvmlite/" + kfunction.functionname + ".png")

    blocknames = []

    for each_node in dotG.get_nodes():

        for each_attr_key, each_attr_val in each_node.get_attributes().items():
            if (each_attr_key == "label"):

                ## print (("label "+re.findall(r"%\d+", each_attr_val)[0],re.findall(r"label %\d+", each_attr_val)))
                blocknames.append("label " +
                                  re.findall(r"%\d+", each_attr_val)[0])
    for i in range(len(kfunction.blocks)):
        kfunction.blocks[i].name = blocknames[i]
Example #14
0
def main(bv: BinaryView):
    # Lift the `target` function to IR
    module = ir.Module(name=__file__)
    f: Function = bv.get_function_at(
        bv.get_symbols_by_name('target')[0].address)
    lifter = FunctionLifter(module, f)
    lifter.run()

    # Output the optimized IR to a CFG
    opt_module: llvm.ModuleRef = lifter.optimize(3)
    opt_target = opt_module.get_function(f.name)
    dot = llvm.get_function_cfg(opt_target)
    open('output.opt.dot', 'w').write(dot)

    # Execute the LLVM IR
    engine = create_execution_engine()
    mod = compile_ir(engine, opt_module)
    func_ptr = engine.get_function_address("target_0")
    cfunc = CFUNCTYPE(c_int64, c_int)(func_ptr)
    print(cfunc(10))
Example #15
0
    def pretty_printer(self,
                       filename=None,
                       view=None,
                       highlight=True,
                       interleave=False,
                       strip_ir=False,
                       show_key=True,
                       fontsize=10):
        """
        "Pretty" prints the DOT graph of the CFG.
        For explanation of the parameters see the docstring for
        numba.core.dispatcher::inspect_cfg.
        """
        import graphviz as gv
        import re
        import json
        import inspect
        from llvmlite import binding as ll
        from numba.typed import List
        from types import SimpleNamespace
        from collections import defaultdict

        _default = False
        _highlight = SimpleNamespace(incref=_default,
                                     decref=_default,
                                     returns=_default,
                                     raises=_default,
                                     meminfo=_default,
                                     branches=_default)
        _interleave = SimpleNamespace(python=_default, lineinfo=_default)

        def parse_config(_config, kwarg):
            """ Parses the kwarg into a consistent format for use in configuring
            the Digraph rendering. _config is the configuration instance to
            update, kwarg is the kwarg on which to base the updates.
            """
            if isinstance(kwarg, bool):
                for attr in _config.__dict__:
                    setattr(_config, attr, kwarg)
            elif isinstance(kwarg, dict):
                for k, v in kwarg.items():
                    if k not in _config.__dict__:
                        raise ValueError("Unexpected key in kwarg: %s" % k)
                    if isinstance(v, bool):
                        setattr(_config, k, v)
                    else:
                        msg = "Unexpected value for key: %s, got:%s"
                        raise ValueError(msg % (k, v))
            elif isinstance(kwarg, set):
                for item in kwarg:
                    if item not in _config.__dict__:
                        raise ValueError("Unexpected key in kwarg: %s" % item)
                    else:
                        setattr(_config, item, True)
            else:
                msg = "Unhandled configuration type for kwarg %s"
                raise ValueError(msg % type(kwarg))

        parse_config(_highlight, highlight)
        parse_config(_interleave, interleave)

        # This is the colour scheme. The graphviz HTML label renderer only takes
        # names for colours: https://www.graphviz.org/doc/info/shapes.html#html
        cs = defaultdict(lambda: 'white')  # default bg colour is white
        cs['marker'] = 'orange'
        cs['python'] = 'yellow'
        cs['truebr'] = 'green'
        cs['falsebr'] = 'red'
        cs['incref'] = 'cyan'
        cs['decref'] = 'turquoise'
        cs['raise'] = 'lightpink'
        cs['meminfo'] = 'lightseagreen'
        cs['return'] = 'purple'

        # Get the raw dot format information from LLVM and the LLVM IR
        fn = self.cres.get_function(self.name)
        #raw_dot = ll.get_function_cfg(fn).replace('\\l...', '')
        llvm_str = self.cres.get_llvm_str()

        def get_metadata(llvm_str):
            """ Gets the metadata entries from the LLVM IR, these look something
            like '!123 = INFORMATION'. Returns a map of metadata key to metadata
            value, i.e. from the example {'!123': INFORMATION}"""
            md = {}
            metadata_entry = re.compile(r'(^[!][0-9]+)(\s+=\s+.*)')
            for x in llvm_str.splitlines():
                match = metadata_entry.match(x)
                if match is not None:
                    g = match.groups()
                    if g is not None:
                        assert len(g) == 2
                        md[g[0]] = g[1]
            return md

        md = get_metadata(llvm_str)

        # setup digraph with initial properties
        def init_digraph(name, fname, fontsize):
            # name and fname are arbitrary graph and file names, they appear in
            # some rendering formats, the fontsize determines the output
            # fontsize.
            f = gv.Digraph(name, fname)
            f.attr(rankdir='TB')
            f.attr('node', shape='none', fontsize='%s' % str(fontsize))
            return f

        f = init_digraph(self.name, self.name, fontsize)

        # A lot of regex is needed to parse the raw dot output. This output
        # contains a mix of LLVM IR in the labels, and also DOT markup.

        # DOT syntax, matches a "port" (where the tail of an edge starts)
        port_match = re.compile('.*{(.*)}.*')
        # DOT syntax, matches the "port" value from a found "port_match"
        port_jmp_match = re.compile('.*<(.*)>(.*)')
        # LLVM syntax, matches a LLVM debug marker
        metadata_marker = re.compile(r'.*!dbg\s+(![0-9]+).*')
        # LLVM syntax, matches a location entry
        location_expr = (r'.*!DILocation\(line:\s+([0-9]+),'
                         r'\s+column:\s+([0-9]),.*')
        location_entry = re.compile(location_expr)
        # LLVM syntax, matches LLVMs internal debug value calls
        dbg_value = re.compile(r'.*call void @llvm.dbg.value.*')
        # LLVM syntax, matches tokens for highlighting
        nrt_incref = re.compile(r"@NRT_incref\b")
        nrt_decref = re.compile(r"@NRT_decref\b")
        nrt_meminfo = re.compile("@NRT_MemInfo")
        ll_raise = re.compile("ret i32 1,")
        ll_return = re.compile("ret i32 [^1],")

        # wrapper function for line wrapping LLVM lines
        def wrap(s):
            return textwrap.wrap(s, width=120, subsequent_indent='... ')

        # function to fix (sometimes escaped for DOT!) LLVM IR etc that needs to
        # be HTML escaped
        def clean(s):
            s = html.escape(s)  # deals with  &, < and >
            s = s.replace('\\{', "&#123;")
            s = s.replace('\\}', "&#125;")
            s = s.replace('\\', "&#92;")
            s = s.replace('%', "&#37;")
            return s.replace('!', "&#33;")

        # These hold the node and edge ids from the raw dot information. They
        # are used later to wire up a new DiGraph that has the same structure
        # as the raw dot but with new nodes.
        node_ids = {}
        edge_ids = {}

        # Python source lines, used if python source interleave is requested
        if _interleave.python:
            src_code, firstlineno = inspect.getsourcelines(self.py_func)

        # This is the dot info from LLVM, it's in DOT form and has continuation
        # lines, strip them and then re-parse into `dot_json` form for use in
        # producing a formatted output.
        raw_dot = ll.get_function_cfg(fn).replace('\\l...', '')
        json_bytes = gv.Source(raw_dot).pipe(format='dot_json')
        jzon = json.loads(json_bytes.decode('utf-8'))

        idc = 0
        # Walk the "objects" (nodes) in the DOT output
        for obj in jzon['objects']:
            # These are used to keep tabs on the current line and column numbers
            # as per the markers. They are tracked so as to make sure a marker
            # is only emitted if there's a change in the marker.
            cur_line, cur_col = -1, -1
            label = obj['label']
            name = obj['name']
            gvid = obj['_gvid']
            node_ids[gvid] = name
            # Label is DOT format, it needs the head and tail removing and then
            # splitting for walking.
            label = label[1:-1]
            lines = label.split('\\l')

            # Holds the new lines
            new_lines = []

            # Aim is to produce an HTML table a bit like this:
            #
            # |------------|
            # | HEADER     | <-- this is the block header
            # |------------|
            # | LLVM SRC   | <--
            # | Marker?    | < this is the label/block body
            # | Python src?| <--
            # |------------|
            # | T   |  F   |  <-- this is the "ports", also determines col_span
            # --------------
            #

            # This is HTML syntax, its the column span. If there's a switch or a
            # branch at the bottom of the node this is rendered as multiple
            # columns in a table. First job is to go and render that and work
            # out how many columns are needed as that dictates how many columns
            # the rest of the source lines must span. In DOT syntax the places
            # that edges join nodes are referred to as "ports". Syntax in DOT
            # is like `node:port`.
            col_span = 1

            # First see if there is a port entry for this node
            port_line = ''
            matched = port_match.match(lines[-1])
            sliced_lines = lines
            if matched is not None:
                # There is a port
                ports = matched.groups()[0]
                ports_tokens = ports.split('|')
                col_span = len(ports_tokens)
                # Generate HTML table data cells, one for each port. If the
                # ports correspond to a branch then they can optionally
                # highlighted based on T/F.
                tdfmt = ('<td BGCOLOR="{}" BORDER="1" ALIGN="center" '
                         'PORT="{}">{}</td>')
                tbl_data = []
                if _highlight.branches:
                    colors = {'T': cs['truebr'], 'F': cs['falsebr']}
                else:
                    colors = {}
                for tok in ports_tokens:
                    target, value = port_jmp_match.match(tok).groups()
                    color = colors.get(value, 'white')
                    tbl_data.append(tdfmt.format(color, target, value))
                port_line = ''.join(tbl_data)
                # Drop the last line from the rest of the parse as it's the port
                # and just been dealt with.
                sliced_lines = lines[:-1]

            # loop peel the block header, it needs a HTML border
            fmtheader = ('<tr><td BGCOLOR="{}" BORDER="1" ALIGN="left" '
                         'COLSPAN="{}">{}</td></tr>')
            new_lines.append(
                fmtheader.format(cs['default'], col_span,
                                 clean(sliced_lines[0].strip())))

            # process rest of block creating the table row at a time.
            fmt = ('<tr><td BGCOLOR="{}" BORDER="0" ALIGN="left" '
                   'COLSPAN="{}">{}</td></tr>')

            def metadata_interleave(l, new_lines):
                """
                Search line `l` for metadata associated with python or line info
                and inject it into `new_lines` if requested.
                """
                matched = metadata_marker.match(l)
                if matched is not None:
                    # there's a metadata marker
                    g = matched.groups()
                    if g is not None:
                        assert len(g) == 1, g
                        marker = g[0]
                        debug_data = md.get(marker, None)
                        if debug_data is not None:
                            # and the metadata marker has a corresponding piece
                            # of metadata
                            ld = location_entry.match(debug_data)
                            if ld is not None:
                                # and the metadata is line info... proceed
                                assert len(ld.groups()) == 2, ld
                                line, col = ld.groups()
                                # only emit a new marker if the line number in
                                # the metadata is "new".
                                if line != cur_line or col != cur_col:
                                    if _interleave.lineinfo:
                                        mfmt = 'Marker %s, Line %s, column %s'
                                        mark_line = mfmt % (marker, line, col)
                                        ln = fmt.format(
                                            cs['marker'], col_span,
                                            clean(mark_line))
                                        new_lines.append(ln)
                                    if _interleave.python:
                                        # TODO:
                                        # +1 for decorator, this probably needs
                                        # the same thing doing as for the
                                        # error messages where the decorator
                                        # is scanned for, its not always +1!
                                        lidx = int(line) - (firstlineno + 1)
                                        source_line = src_code[lidx + 1]
                                        ln = fmt.format(
                                            cs['python'], col_span,
                                            clean(source_line))
                                        new_lines.append(ln)
                                    return line, col

            for l in sliced_lines[1:]:

                # Drop LLVM debug call entries
                if dbg_value.match(l):
                    continue

                # if requested generate interleaving of markers or python from
                # metadata
                if _interleave.lineinfo or _interleave.python:
                    updated_lineinfo = metadata_interleave(l, new_lines)
                    if updated_lineinfo is not None:
                        cur_line, cur_col = updated_lineinfo

                # Highlight other LLVM features if requested, HTML BGCOLOR
                # property is set by this.
                if _highlight.incref and nrt_incref.search(l):
                    colour = cs['incref']
                elif _highlight.decref and nrt_decref.search(l):
                    colour = cs['decref']
                elif _highlight.meminfo and nrt_meminfo.search(l):
                    colour = cs['meminfo']
                elif _highlight.raises and ll_raise.search(l):
                    # search for raise as its more specific than exit
                    colour = cs['raise']
                elif _highlight.returns and ll_return.search(l):
                    colour = cs['return']
                else:
                    colour = cs['default']

                # Use the default coloring as a flag to force printing if a
                # special token print was requested AND LLVM ir stripping is
                # required
                if colour is not cs['default'] or not strip_ir:
                    for x in wrap(clean(l)):
                        new_lines.append(fmt.format(colour, col_span, x))

            # add in the port line at the end of the block if it was present
            # (this was built right at the top of the parse)
            if port_line:
                new_lines.append('<tr>{}</tr>'.format(port_line))

            # If there was data, create a table, else don't!
            dat = ''.join(new_lines)
            if dat:
                tab = (('<table id="%s" BORDER="1" CELLBORDER="0" '
                        'CELLPADDING="0" CELLSPACING="0">%s</table>') %
                       (idc, dat))
                label = '<{}>'.format(tab)
            else:
                label = ''

            # finally, add a replacement node for the original with a new marked
            # up label.
            f.node(name, label=label)

        # Parse the edge data
        if 'edges' in jzon:  # might be a single block, no edges
            for edge in jzon['edges']:
                gvid = edge['_gvid']
                tp = edge.get('tailport', None)
                edge_ids[gvid] = (edge['head'], edge['tail'], tp)

        # Write in the edge wiring with respect to the new nodes:ports.
        for gvid, edge in edge_ids.items():
            tail = node_ids[edge[1]]
            head = node_ids[edge[0]]
            port = edge[2]
            if port is not None:
                tail += ':%s' % port
            f.edge(tail, head)

        # Add a key to the graph if requested.
        if show_key:
            key_tab = []
            for k, v in cs.items():
                key_tab.append(
                    ('<tr><td BGCOLOR="{}" BORDER="0" ALIGN="center"'
                     '>{}</td></tr>').format(v, k))
            # The first < and last > are DOT syntax, rest is DOT HTML.
            f.node("Key",
                   label=('<<table BORDER="1" CELLBORDER="1" '
                          'CELLPADDING="2" CELLSPACING="1"><tr><td BORDER="0">'
                          'Key:</td></tr>{}</table>>').format(
                              ''.join(key_tab)))

        # Render if required
        if filename is not None or view is not None:
            f.render(filename=filename, view=view, format='pdf')

        # Else pipe out a SVG
        return f.pipe(format='svg')
Example #16
0
import llvmlite.binding as llvm
import pygraphviz
import networkx

mod = llvm.parse_bitcode(open("curl-7.54.0/src/.libs/curl.0.4.opt.bc", 'r').read())
mod.verify()
graph = ""
for func in mod.functions:
  graph += llvm.get_function_cfg(func)
graph2 = pygraphviz.AGraph(graph)
graph3 = networkx.Graph(graph2)
networkx.draw(graph3)