Example #1
0
def do_render_flows(bFlow, sFlow, attrs=None):
    """render a pair"""
    bAttrs = attrs if attrs is not None else dict(attrs=bFlow.attrKeys)
    sAttrs = attrs if attrs is not None else dict(attrs=sFlow.attrKeys)
    render.render_graph(bFlow.digraph,
                        dir=tempfile.tempdir,
                        name=bFlow.name,
                        prefix="bin",
                        attrs=bAttrs,
                        interactive=True)
    render.render_graph(sFlow.digraph,
                        dir=tempfile.tempdir,
                        name=sFlow.name,
                        prefix="src",
                        attrs=sAttrs,
                        interactive=True)
    def _map_subgraph(self, input_map, btfg, stfg):
        assert isinstance(input_map, GraphMap)
        assert isinstance(self.bFlow, cf.BinaryControlFlow)
        assert isinstance(self.sFlow, cf.SourceControlFlow)
        assert isinstance(btfg, HierarchicalFlowGraph)
        assert isinstance(stfg, HierarchicalFlowGraph)
        # --
        btfg_flow = btfg.flow
        stfg_flow = stfg.flow
        assert isinstance(btfg_flow, TransformedFlowGraph)
        assert isinstance(stfg_flow, TransformedFlowGraph)
        # --
        unmapped_nodes = input_map.unmapped()
        mapped_nodes = input_map.mapped()
        log.debug("Unmapped binary nodes: {}".format(list(unmapped_nodes)))

        def Union(a, b):
            # Move elements of a to b
            node_sets[b] = node_sets[b].union(node_sets[a])
            for n in node_sets[a]:
                node_lookup[n] = b

            node_sets[a] = set()

        def Find(el):
            assert el in node_lookup
            return node_lookup[el]

        def merge_single_paths(merged_nodes, node_sets):
            """
            This algorithm ensures that fixpoints are propagated in simple paths.

            Notes:
                This must be done in-order (dfs preorder) to avoid useless path traversals;
                each unmapped node is visited exactly once.Process dominator tree
                bottom-up until a fixed point is found (mapped_nodes). Repeat this
                until all nodes from unmapped_nodes are processed.
            """
            nlist = SortedKeyList(
                iterable=unmapped_nodes,
                key=btfg_flow.get_dom_tree().get_preorder_number)
            # noinspection PyUnreachableCode
            for n in reversed(nlist):
                pred = list(btfg_flow.get_graph().predecessors(n))
                succ = list(btfg_flow.get_graph().successors(n))

                if len(pred) != 1 or len(succ) != 1:
                    continue

                dom_pred = list(
                    btfg_flow.get_dom_tree()._domTree.predecessors(n))
                dom_succ = list(
                    btfg_flow.get_dom_tree()._domTree.successors(n))
                assert len(dom_pred) == 1, "Dom tree not really a tree"

                # First nodes in nlist are leaf nodes in predominator tree
                if len(dom_succ) == 0:
                    # Assign to predecessor node in predom tree
                    log.debug("Assigning leaf {} to {}.".format(n, dom_pred))
                    # assert dom_pred[0] in mapped_nodes -> not needed.
                    Union(n, dom_pred[0])
                    merged_nodes.add(n)
                    continue
                else:
                    # Not a leaf node in predominator tree
                    assert len(dom_succ) == 1, "Invalid dominator tree."
                    last_succ = Find(dom_succ[0])
                    # prefer downwards
                    if last_succ in mapped_nodes:
                        # Assign this set to last_succ
                        log.debug(
                            "Assigned {} downwards to fixed point {}.".format(
                                n, last_succ))
                        Union(n, last_succ)
                        merged_nodes.add(n)
                        continue
                    else:
                        log.debug("Assigned {} upwards to point {}.".format(
                            n, dom_pred[0]))
                        # assert dom_pred[0] in mapped_nodes -> not needed
                        Union(n, dom_pred[0])
                        merged_nodes.add(n)
                        continue
                # --
                # noinspection PyUnreachableCode
                assert False, "should not land here"

            for n in merged_nodes:
                assert len(node_sets[n]) == 0, "Algorithm failed, must fix."

        def contract_dominator_tree(merged_nodes, node_sets):
            last_nodes = unmapped_nodes.difference(merged_nodes)

            # First pass on the remaining node, rule out specific cases. Head nodes
            # in simple paths are processed here.
            first_pass_merged_nodes = set()
            for n in last_nodes:

                pred = list(btfg_flow.get_graph().predecessors(n))
                succ = list(btfg_flow.get_graph().successors(n))

                # dom_pred = list(b_tfg.get_dom_tree()._domTree.predecessors(n))
                dom_succ = list(
                    btfg_flow.get_dom_tree()._domTree.successors(n))

                if len(pred) == 1 and len(succ) == 1:
                    assert False, "Something missing in the previous step, fix needed..."

                # Node with multiple in_edges and single out_edge, this node
                # dominates the successor in domTree, merge to fix_point downwards
                # if fix_point is already a mapped node, or it is mapped in the previous
                # step.
                if len(pred) > 1 and len(succ) == 1:
                    if len(dom_succ) != 1:
                        continue
                    # Check if node is merged in the previous step
                    last_succ = Find(succ[0])
                    if last_succ in mapped_nodes:
                        # Assign this node to last_succ
                        log.debug(
                            "Assigned node {} with mult_in_edg and single out_edg to {}."
                            .format(n, last_succ))
                        Union(n, last_succ)
                        first_pass_merged_nodes.add(n)

            merged_nodes = merged_nodes.union(first_pass_merged_nodes)
            last_nodes = last_nodes.difference(first_pass_merged_nodes)

            # Second pass on the remaining nodes, perform generic contraction. Either
            # structurally, cutting the connected subgraphs of uncolored nodes, or
            # process nodes iteratively bottom-up in the dominator tree. Uncolored
            # nodes here are always assigned upwards in dominator tree.
            nlist = SortedKeyList(
                iterable=last_nodes,
                key=btfg_flow.get_dom_tree().get_preorder_number)

            for n in reversed(nlist):
                # Pred is already checked to have length 1, domTree is valid
                dom_pred = list(
                    btfg_flow.get_dom_tree()._domTree.predecessors(n))
                last_pred = Find(dom_pred[0])
                if last_pred in mapped_nodes:
                    # Already found a fixed point (simple_path)
                    log.debug(
                        "[2nd pass] Assigning {} to fixed node {}".format(
                            n, last_pred))
                    Union(n, last_pred)
                    merged_nodes.add(n)
                else:
                    # Assign upwards
                    # log.debug("[2nd pass] Assigning {} upwards to node {}".format(n, dom_pred[0]))
                    # Union(n, dom_pred[0])
                    log.debug(
                        "[2nd pass] Assigning {} upwards to node {}".format(
                            n, last_pred))
                    Union(n, last_pred)
                    merged_nodes.add(n)

            for n in last_nodes:
                assert len(node_sets[n]) == 0, "Algorithm failed, must fix."

            # FIXME: of execution order shall be maintained, we must preserve ctrl dep, as well.

            # Yet another sanity check
            for n in btfg_flow.get_graph().nodes:
                m = Find(n)
                if m not in mapped_nodes:
                    assert False, "Not all unmapped nodes were assigned to mapped points.\n" \
                                  "Unmapped node {}.\n Node Sets: {}".format(m, node_sets)

        log.info("Completing mapping of {}".format(btfg.name))

        if self.do_render:
            eid = btfg_flow.get_entry_id()
            render.render_graph(btfg_flow.get_dom_tree()._domTree,
                                dir=tempfile.tempdir,
                                name=self.bFlow.name + '_domTree_' + str(eid),
                                prefix="bin",
                                topnode=eid)

        node_sets = {n: {n} for n in btfg_flow.get_graph().nodes}
        node_lookup = {n: n for n in btfg_flow.get_graph().nodes}
        merged_nodes = set()
        merge_single_paths(merged_nodes, node_sets)
        contract_dominator_tree(merged_nodes, node_sets)

        mapping_dict = dict()
        for n in btfg_flow.get_graph().nodes:
            matched_src_node = input_map[Find(n)]
            assert matched_src_node is not None
            mapping_dict[n] = matched_src_node
        # --
        smap = GraphMap(btfg_flow,
                        stfg_flow,
                        mapping_dict,
                        "overapprox.map",
                        predecessors=[input_map])
        report = dict(comment="no report implemented"
                      )  # FIXME: write a report of stuff done
        return smap, report
    def _map_subgraph(self, input_map, btfg, stfg):
        assert isinstance(btfg, HierarchicalFlowGraph)
        assert isinstance(stfg, HierarchicalFlowGraph)

        def get_original_loop_id(tfg, regionId):
            assert isinstance(tfg, transformer.TransformedFlowGraph)
            # --
            # Get region collection
            tfg_regions = tfg.get_region_collection()
            assert regionId in tfg_regions._loopRegions
            # All nodes represent region IDs, the new ones must be reduced loops
            l_region = tfg_regions.get_region(regionId)
            assert l_region is not None, "Invalid region id."
            l_transf = l_region.get_transf()
            # --
            assert isinstance(l_transf, transformation.ReducedLoopTransf)
            return l_transf.get_header_node()

        def compute_ctrldep_map():
            """
            filter map using dominator homomorphism
            :returns GraphMap
            """
            def get_bb_ctrlprops(flow, tfg, ctrldep):
                """label dependent BBs with the labels of their immediate controlling edges
                :returns dict(edge_label in tfg: immediately controlled nodes in tfg)
                """
                g = tfg.flow.get_graph()
                node2edges = dict()  # nodes -> edge labels
                for e, controlled_nodes in ctrldep.iteritems():
                    if e in g.edges:
                        for c in controlled_nodes:
                            lbl = flow.digraph.edges[e][
                                'label']  # must not fail
                            if c not in node2edges:
                                node2edges[c] = set()
                            node2edges[c].add(lbl)
                # -- make them hashable
                node2cond = {
                    k: frozenset(v)
                    for k, v in node2edges.iteritems()
                }
                return node2cond

            def get_subgraph_ctrldeps():
                """filter flow-wide deps to only hold edges and nodes of this subgraph,
                and also remove self-dep of loop headers"""
                def get_and_filter_subgraph(tfg, flow):
                    ctrldep = flow.get_control_dependencies()
                    g = tfg.flow.get_graph()
                    ctrldep_here = {
                        k: set(
                            filter(lambda x: x in g.nodes and x != tfg.loop_id,
                                   v))
                        for k, v in ctrldep.iteritems() if k in g.edges
                    }
                    return ctrldep_here

                deps_bin = get_and_filter_subgraph(btfg, self.bFlow)
                deps_src = get_and_filter_subgraph(stfg, self.sFlow)
                return deps_bin, deps_src

            f_map = dict()
            f_map.update(fixed_points)
            log.info("Running ctrl-dep mapping on '{}'".format(btfg.name))
            log.debug("Fixed points={}".format(f_map.items()))

            #####################
            # ctrldep properties
            #####################
            ctrldep_bin, ctrldep_src = get_subgraph_ctrldeps()
            report["control-dependency"] = dict(
                bin={str(k): str(list(v))
                     for k, v in ctrldep_bin.iteritems()},
                src={str(k): str(list(v))
                     for k, v in ctrldep_src.iteritems()})
            bnode2ctrl = get_bb_ctrlprops(self.bFlow, btfg, ctrldep_bin)
            snode2ctrl = get_bb_ctrlprops(self.sFlow, stfg, ctrldep_src)
            report['node-ctrl-props'] = dict(
                bin={
                    k: " || ".join(list(v))
                    for k, v in bnode2ctrl.iteritems()
                },
                src={
                    k: " || ".join(list(v))
                    for k, v in snode2ctrl.iteritems()
                })

            ###########
            # matching
            ###########
            # reverse snode to match them
            rev = dict()
            for node, cond in snode2ctrl.iteritems():
                if cond not in rev:
                    rev[cond] = {node}
                else:
                    rev[cond].add(node)

            for bb, cond in bnode2ctrl.iteritems():
                snodes = rev.get(cond, {})
                if snodes:
                    log.debug("{}: bin-{} maps to src nodes: {}".format(
                        btfg.name, bb, snodes))
                    # If there are multiple src BBs, we can annotate to any of them
                    # however, some of them are loop headers. Do not annotate there.
                    found = False
                    for sn in snodes:
                        # FIXME: see paper whether they have addressed that one.
                        loc = self.sFlow.get_line_info(sn)
                        if 'min' in loc and loc['min'].get('l', 0) > 0:
                            f_map[bb] = sn
                            found = True
                            break
                    if not found:
                        log.debug("{} No valid src locations for {}".format(
                            btfg.name, bb))
                else:
                    log.debug("{}: no match for bin-{}, cond={}".format(
                        btfg.name, bb, cond))

            # --
            g = GraphMap(gA=btfg.flow,
                         gB=stfg.flow,
                         dict_map=f_map,
                         name="ctrl dependency")
            return g

        report = dict()
        flag_isCondensed = btfg.parent is None and stfg.parent is None

        ################
        # Render graphs
        ################
        if self.do_render:
            if flag_isCondensed:
                render.render_graph(btfg.flow.get_graph(),
                                    dir=tempfile.tempdir,
                                    name=btfg.name + "_allreduced",
                                    prefix="bin")
                render.render_graph(stfg.flow.get_graph(),
                                    dir=tempfile.tempdir,
                                    name=stfg.name + "_allreduced",
                                    prefix="src")
            else:
                render.render_graph(btfg.flow.get_graph(),
                                    dir=tempfile.tempdir,
                                    name=btfg.name,
                                    prefix="sub")
                render.render_graph(stfg.flow.get_graph(),
                                    dir=tempfile.tempdir,
                                    name=stfg.name,
                                    prefix="sub")

        ###############
        # Fixed-points
        ###############
        # 0.a Find new nodes, save original loop headers as fixed points
        fixed_points = dict()

        nodes_new_b = {
            n: get_original_loop_id(btfg.flow, n)
            for n in btfg.flow.get_graph().nodes
            if n > self.bFlow.get_max_id()
        }
        nodes_new_s = {
            n: get_original_loop_id(stfg.flow, n)
            for n in stfg.flow.get_graph().nodes
            if n > self.sFlow.get_max_id()
        }

        b_regions = btfg.flow.get_region_collection()
        s_regions = stfg.flow.get_region_collection()
        for n in nodes_new_b.keys():
            if not b_regions.is_loop_region_matched(n):
                continue
            # Get corresponding new source loop region id
            needed_sid = nodes_new_b[n]
            sub_b = btfg.find(needed_sid)
            partner_s = sub_b.partner
            assert partner_s is not None
            l_source_origin_id = partner_s.loop_id
            l_source_region_id = s_regions.get_loop_region_id(
                l_source_origin_id)
            assert l_source_region_id in nodes_new_s.keys()
            # Add fixed point
            fixed_points[n] = l_source_region_id

        # 0.b Add entry and exit node as fixed point for the condensed flow.
        if flag_isCondensed:
            assert btfg.flow.is_subflow() is not True, "Not a condensed flow."
            assert stfg.flow.is_subflow() is not True, "Not a condensed flow."
            fixed_points[btfg.flow.get_orig_flow_entry_id()] = \
                stfg.flow.get_orig_flow_entry_id()
            fixed_points[btfg.flow.get_orig_flow_exit_id()] = \
                stfg.flow.get_orig_flow_exit_id()

        # 0.c Add entry node of reduced flow if not condensed
        if not flag_isCondensed:
            assert btfg.flow.is_subflow() is True
            assert stfg.flow.is_subflow() is True
            # b_loop_id and s_loop_id are entry node id's
            fixed_points[btfg.loop_id] = stfg.loop_id

        log.debug("Fixed points: {}".format(fixed_points))

        ##########
        # Mapping
        ##########
        h_map = compute_ctrldep_map()
        report['ctrlDepMap'] = h_map
        report['trust-dbg-info'] = self.trust_dbg_columns
        # --
        return h_map, report
Example #4
0
def map_flows(bFlow,
              sFlow,
              mapper_name,
              hom_order,
              extLoopInfo=None,
              do_render=False,
              trust_dbg=False):
    """
    Establish a mapping between a pair of source and binary CFGs.
    Returns a hierarchical graph map that maps bin to source.
    FIXME: generalize hom_order into mapper arguments.
    """
    def report_tfg(report_dic, tfg, ident):
        """write some details about a transformed flow graph to the report"""
        assert ident not in report_dic, "Duplicate transformed flow graph."
        # --
        nodes_to_scan = [
            n for n in tfg.get_graph().nodes if n <= tfg._c_flow._maxId
        ]
        fcall_dict = {n: tfg._c_flow.get_func_calls(n) for n in nodes_to_scan}
        report_dic[ident] = {
            "edges": list(tfg.get_graph().edges),
            "nodes": list(tfg.get_graph().nodes),
            "fcalls": fcall_dict
        }

    def set_hierarchy_pairs(bhf, shf, matches_s2b, extLoopInfo):
        """Indicate which subgraphs in hierarchy are pairs, based on the loop matching."""
        def walk_level_b(bl):
            """mark those that shall be skipped"""
            if extLoopInfo and str(
                    bl.loop_id
            ) in extLoopInfo['loops']:  # FIXME: extLoopinfo is str
                bl.skip = True
                return
            for sub_bhf in bl.subflows:
                walk_level_b(sub_bhf)

        def walk_level_s2b(bl, sl):
            """pair all subflows at this level, and send pairs down for another walk"""
            bl.set_partner(sl)
            for sub_shf in sl.subflows:
                needed_bb = matches_s2b[sub_shf.loop_id]
                sub_bhf = None
                for this_sub_bhf in bl.subflows:
                    if this_sub_bhf.loop_id == needed_bb:
                        sub_bhf = this_sub_bhf
                        break
                assert sub_bhf is not None
                walk_level_s2b(sub_bhf, sub_shf)

        # --
        walk_level_b(bhf)
        walk_level_s2b(bhf, shf)

    def mark_matched_loops(matches, report_dic):
        """Mark which loops have been matched. Mainly for report"""
        b_rcoll = b_hflow.flow.get_region_collection()
        s_rcoll = s_hflow.flow.get_region_collection()
        for s_loop, b_loop in matches.items():
            b_r_id = b_rcoll.get_loop_region_id(b_loop)
            b_rcoll.mark_loop_region_as_matched(b_r_id)
            s_r_id = s_rcoll.get_loop_region_id(s_loop)
            s_rcoll.mark_loop_region_as_matched(s_r_id)
        # add more retails for skipped binary loops (their corresponding binary loop region id)
        sbl = report_dic.get("skipped_bin_loops", None)
        skipped_b_loops_region_ids = dict()
        b_loops_region_collection = b_hflow.flow.get_region_collection()
        for bLoop in sbl:
            skipped_b_loops_region_ids[
                bLoop] = b_loops_region_collection.get_loop_region_id(bLoop)
        report_dic["skipped_bin_r_ids"] = skipped_b_loops_region_ids

    def map_all(chosen_mapper):
        """
        Run sequence of mapppers and return final mapping
        FIXME: it's a pipeline, implement it in a generic way
        """
        report["mapping_collection"].update(precise=dict(),
                                            linelump=dict(),
                                            domlump=dict(),
                                            complete=dict())
        # precise mapper:
        chosen_mapper.set_report(report["mapping_collection"]["precise"])
        pmap, b_hflow0, s_hflow0 = chosen_mapper.compute_mapping()
        pmap.consistency_check()

        # lumps some remaining nodes into their direct pre/succ:
        slmapper = StraightLineLumping(input_hmap=pmap,
                                       bFlow=bFlow,
                                       sFlow=sFlow,
                                       bhFlow=b_hflow0,
                                       shFlow=s_hflow0,
                                       do_render=False)
        slmapper.set_report(report["mapping_collection"]["linelump"])
        lmap, b_hflow1, s_hflow1 = slmapper.compute_mapping()
        lmap.consistency_check()
        lmap.add_predecessor(pmap)

        # lumps all remaining nodes into dominators:
        dlmapper = DominatorLumping(input_hmap=lmap,
                                    bFlow=bFlow,
                                    sFlow=sFlow,
                                    bhFlow=b_hflow1,
                                    shFlow=s_hflow1,
                                    do_render=do_render)
        dlmapper.set_report(report["mapping_collection"]["domlump"])
        dmap, b_hflow2, s_hflow2 = dlmapper.compute_mapping()
        dmap.consistency_check()
        dmap.add_predecessor(lmap)

        # handles skipped subflows:
        skipmapper = SkipMapper(input_hmap=dmap,
                                bFlow=bFlow,
                                sFlow=sFlow,
                                bhFlow=b_hflow2,
                                shFlow=s_hflow2,
                                annot=extLoopInfo,
                                do_render=False)
        skipmapper.set_report(report["mapping_collection"]["complete"])
        hmap, _, _ = skipmapper.compute_mapping()
        hmap.consistency_check()
        hmap.add_predecessor(dmap)

        # -- stats:
        stats = pmap.calc_statistics()  # MappingStatistics
        percent_precise = ((100. * stats.data['mapped']) / stats.data['total']
                           if stats.data['total'] > 0 else 0.)

        log.info(
            "Function '{}': Mapped {} (sub)graphs with {} nodes, {:.2f}% precise"
            .format(bFlow.name, stats.data['graphs'], stats.data['total'],
                    percent_precise))
        # --
        assert isinstance(hmap, gm.HierarchicalGraphMap)
        return hmap

    ####################
    # Initialize report
    ####################
    report = {
        "bin_func_name": bFlow.name,
        "src_func_name": sFlow.name,
        "bb_timing": bFlow._blockTimes,
        "matched_loops": dict(),
        "flows_bin": dict(),
        "flows_src": dict(),
        "mapping_collection": dict(),
        "mapping_details": dict(),
        "max_bin_id": bFlow._maxId,
        "max_src_id": sFlow._maxId
    }

    #############
    # edge match
    #############
    rpt = edge_matcher.match(bFlow=bFlow,
                             sFlow=sFlow,
                             do_render=do_render,
                             trust_dbg=trust_dbg)
    report['edge-matches'] = rpt

    #############################################
    # Collapse loops -> condensed/reduced graphs
    #############################################
    # subprogram with N loops will produce N+1 tfgs ( |b_l_tfgs| = N, |b_tfg|=1 ) in a hierarchy
    b_hflow = transformer.get_reduced_hierarchy(bFlow)
    s_hflow = transformer.get_reduced_hierarchy(sFlow)
    report_tfg(report["flows_bin"], b_hflow.flow,
               "all_reduced")  # report top-level
    report_tfg(report["flows_src"], s_hflow.flow, "all_reduced")

    if do_render:
        render.render_graph(bFlow.postdom_tree()._domTree,
                            dir=tempfile.tempdir,
                            name=bFlow.name + '_postDomTree',
                            prefix="bin",
                            topnode=bFlow._exitId,
                            attrs=['num'])
        render.render_graph(bFlow.predom_tree()._domTree,
                            dir=tempfile.tempdir,
                            name=bFlow.name + '_domTree',
                            prefix="bin",
                            topnode=bFlow._entryId,
                            attrs=['num'])
        render.render_graph(sFlow.predom_tree()._domTree,
                            dir=tempfile.tempdir,
                            name=sFlow.name + '_domTree',
                            prefix="src",
                            topnode=sFlow._entryId,
                            attrs=['num'])

    #############################
    # Loops & Hierachical decomp
    #############################
    # we match loops (as a whole, not their nodes) first. FIXME: why independent of flow reduction?
    matched_loops, rpt = loop_matcher.match(bFlow=bFlow,
                                            sFlow=sFlow,
                                            extLoopInfo=extLoopInfo,
                                            do_render=do_render)
    mark_matched_loops(matched_loops, report_dic=rpt)
    report["matched_loops"] = rpt
    set_hierarchy_pairs(b_hflow, s_hflow, matched_loops, extLoopInfo)

    ###########
    # ctrl dep
    ###########
    def mark_ctrl_edges(g, ebunch):
        for e in ebunch:
            g.edges[e]['ctrl'] = True

    ctrl_bin = bFlow.get_control_dependencies()
    ctrl_src = sFlow.get_control_dependencies()
    mark_ctrl_edges(bFlow.digraph, ctrl_bin.keys())
    mark_ctrl_edges(sFlow.digraph, ctrl_src.keys())
    report["control-dependency"] = dict(
        bin={str(k): str(list(v))
             for k, v in ctrl_bin.iteritems()},
        src={str(k): str(list(v))
             for k, v in ctrl_src.iteritems()})

    ##########
    # Mapping
    ##########
    def select_mapper():
        m = None
        if mapper_name == 'homomorphism':
            m = HomomorphismMapper(input_hmap=None,
                                   sFlow=sFlow,
                                   bFlow=bFlow,
                                   bhFlow=b_hflow,
                                   shFlow=s_hflow,
                                   hom_order=hom_order,
                                   trust_dbg=trust_dbg,
                                   do_render=do_render,
                                   check_inputs=True)
        elif mapper_name == 'ctrldep':
            m = CtrlDependencyMapper(input_hmap=None,
                                     sFlow=sFlow,
                                     bFlow=bFlow,
                                     bhFlow=b_hflow,
                                     shFlow=s_hflow,
                                     trust_dbg=trust_dbg,
                                     do_render=do_render,
                                     check_inputs=True)
        return m

    mapper = select_mapper()
    if mapper is None:
        log.error("Unknown mapper: {}".format(mapper_name))
    try:
        final_mapping = map_all(mapper)
    except:
        import traceback
        traceback.print_exc()
        raise ValueError

    # --
    assert isinstance(final_mapping, gm.HierarchicalGraphMap)
    return final_mapping, report
Example #5
0
def do_render_mapping(bFlow, sFlow, hierarchical_map, annot_file):
    """
    Render mapping for a CFG pair
    """
    def add_pre(s, p):
        return "{}{}".format(p, s)

    def copy_and_rename(gdst, gfrom, prefix, maxid):
        """copy nodes, edges and attributes from gsrc to gdst, while adding a prefix"""
        gdst.add_nodes_from([(add_pre(n, prefix), gfrom.nodes[n])
                             for n in gfrom.nodes
                             if n <= maxid])  # FIXME: not elegant with maxid
        gdst.add_edges_from([(add_pre(e[0], prefix), add_pre(e[1], prefix))
                             for e in gfrom.edges
                             if e[0] <= maxid and e[1] <= maxid])
        # node attrs
        for _n in gfrom.nodes:
            at = gfrom.nodes[_n]
            gdst.nodes[add_pre(_n, prefix)].update(at)
        for _e0, _e1 in gfrom.edges:
            at = gfrom.edges[(_e0, _e1)]
            gdst.edges[(add_pre(_e0, prefix), add_pre(_e1, prefix))].update(at)

    def build_clusters(hmap):
        """convert hierarchy into subgraph clusters for renderer"""
        assert isinstance(hmap, gm.HierarchicalGraphMap)
        parts = hmap.name.split("|")
        if len(parts) == 2:
            cbname = parts[0]
            csname = parts[1]
        else:
            cbname = "bin_{}".format(hmap.name)
            csname = "src_{}".format(hmap.name)
        cluster_bin = render.GraphCluster(cbname)
        cluster_src = render.GraphCluster(csname)
        # children
        for c in hmap.children:
            ccb, ccs = build_clusters(c)
            if ccb is not None:
                cluster_bin.add_child(ccb)
            if ccs is not None:
                cluster_src.add_child(ccs)
        # myself
        b_nodes = hmap.mapping.mapped() | hmap.mapping.unmapped()  # bin BBs!
        if hmap.mapping.graph_B is not None:
            s_nodes = hmap.mapping.graph_B.get_graph().nodes()
        else:
            s_nodes = set()
        if s_nodes:
            cluster_src.add_nodes(map(lambda x: add_pre(x, "s"), s_nodes))
            cluster_src.topnode = add_pre(hmap.mapping.graph_B.entryId, 's')
        if b_nodes:
            cluster_bin.add_nodes(map(lambda x: add_pre(x, "b"), b_nodes))
            cluster_bin.topnode = add_pre(hmap.mapping.graph_A.entryId, 'b')

        if cluster_src.empty():
            return cluster_bin, None
        else:
            cluster_bin.add_relative(cluster_src)
            return cluster_bin, cluster_src

    def locstr(loc):
        disc = loc.get('d', None)
        col = loc.get('c', 0)
        line = loc.get('l', 0)
        s = "{}".format(line)
        if col != 0:
            s += ":{}".format(col)
        if disc is not None:
            s += " ({})".format(disc)
        return s

    def decorate():
        """add some more informative attrs to graph"""
        def deco(flow):
            for n in flow.digraph.nodes:
                dec = dict()
                # line info
                lid = flow.get_line_info(n)
                if lid:
                    try:
                        dec = dict(begin=locstr(lid["begin"]),
                                   end=locstr(lid["end"]))
                        dec.update(
                            dict(min=locstr(lid["min"]),
                                 max=locstr(lid["max"])))
                    except KeyError:
                        pass
                # func calls
                fc = flow.get_func_calls(n)
                if fc:
                    dec['calls'] = str(fc)
                # timing
                if hasattr(flow, "_blockTimes"):
                    try:
                        dec['time'] = flow._blockTimes[n]
                    except KeyError:
                        pass
                # --
                flow.digraph.node[n].update(dec)

        deco(bFlow)
        deco(sFlow)

    decorate()

    ############################
    # copy both graphs into one
    ############################
    both_graphs = nx.DiGraph()
    both_graphs.graph.update(bFlow.digraph.graph)  # take graph attrs from bin
    copy_and_rename(both_graphs, bFlow.digraph, "b", bFlow.get_max_id())
    copy_and_rename(both_graphs, sFlow.digraph, "s", sFlow.get_max_id())

    # get cluster/subgraph hierarchy
    clusters = build_clusters(hierarchical_map)
    if not clusters[0].check_cluster():
        log.warning("Cluster bin of {} inconsistent".format(
            hierarchical_map.name))
    if not clusters[1].check_cluster():
        log.warning("Cluster src of {} inconsistent".format(
            hierarchical_map.name))

    # get the mapping itself
    flatmap = hierarchical_map.flatten()
    allmap = {
        add_pre(k, 'b'): add_pre(v, 's')
        for k, v in flatmap.get_map().iteritems()
    }

    # add one commonn entry/exit node for better visualization
    entries = ['b{}'.format(bFlow._entryId), 's{}'.format(sFlow._entryId)]
    exits = ['b{}'.format(bFlow._exitId), 's{}'.format(sFlow._exitId)]
    both_graphs.add_node("entry", shape='diamond')
    both_graphs.add_node("exit", shape='diamond')
    both_graphs.add_edges_from([('entry', n) for n in entries])
    both_graphs.add_edges_from([(n, 'exit') for n in exits])

    # mark control edges in color:
    for e in both_graphs.edges:
        if 'ctrl' in both_graphs.edges[e]:
            both_graphs.edges[e]['color'] = both_graphs.edges[e][
                'fontcolor'] = 'red'

    ##############################
    # mark precisely mapped nodes
    ##############################
    precise_nodes = set()
    try:
        precise_map = _get_last_precise_map(hierarchical_map)
        log.info("Rendering mapping of '{}' with precise map='{}'".format(
            hierarchical_map.name, precise_map.mapping.name()))

        # flatten and colorize nodes
        pflatmap = precise_map.flatten()
        mapped_nodes = [add_pre(n, 'b') for n in pflatmap.mapped()]
        precise_nodes |= set(mapped_nodes)
        for n in mapped_nodes:
            both_graphs.nodes[n].update(
                dict(fillcolor='darkolivegreen1', style='filled'))

    except AssertionError:
        log.warning("cannot highlight precisely mapped nodes in mapping")

    ###################
    # add mapping edges
    ###################
    if precise_nodes:
        mapping_edges = [(k, v) for k, v in allmap.iteritems()
                         if k in precise_nodes]
    else:
        mapping_edges = [(k, v) for k, v in allmap.iteritems()]
    both_graphs.add_edges_from(mapping_edges,
                               virtual=True,
                               color='gray80',
                               style='dashed',
                               constraint='False')

    # ... and finally render
    imgname = "map_" + hierarchical_map.name
    render.render_graph(G=both_graphs,
                        dir=tempfile.tempdir,
                        name=imgname,
                        interactive=True,
                        topnode='entry',
                        botnode='exit',
                        clusters=list(clusters),
                        keepfiles=False,
                        attrs=('color', 'fillcolor', 'fontcolor', 'style',
                               'shape', 'arrowhead', 'constraint', 'begin',
                               'end', 'min', 'max', 'time', 'calls'))
    def _map_subgraph(self, input_map, btfg, stfg):
        assert isinstance(btfg, HierarchicalFlowGraph)
        assert isinstance(stfg, HierarchicalFlowGraph)

        def compute_potential_map():
            """
            Using debug info and function calls, determine a potential mapping bin->[srcbb]
            :returns dict(binBB -> list(srcBB))
            """
            def get_sblocks_matching_dwarflines():
                """
                Find sBBs that match to each dwarf line (i.e., ~adress)
                :returns tuple (map_precise, map_fallback) where
                            map_precise: dw line -> src BB. considering column/discr info
                            map_fallback: dw line -> src BB. considering only line numbers
                """
                def verbose_unique():
                    """just debug output"""
                    nodes_unq_dwlines = self.bFlow.get_unique_dw_lines(
                        nodes_b)  # line -> unique BB
                    log.debug("Unique dwlines:")
                    for dwl_i, node in nodes_unq_dwlines.items():
                        log.debug("Found in node {}, dwl={}".format(
                            node, self.bFlow._dwData._dwData['LineInfoEntries']
                            [str(dwl_i)]))
                    log.debug("")

                # verbose_unique()

                # LUT
                allDwLines = dict()
                for n in nodes_b:
                    dwLines = self.bFlow._dwData.get_dw_lines(
                        self.bFlow.get_addr_ranges(n))
                    allDwLines.update(dwLines)

                # generate precise (line+col/discr; known to be unreliable with gcc)
                dw2src_map = dict()
                if self.trust_dbg_columns:
                    haveCol = False
                    for key, dwLine in allDwLines.items():
                        line = dwLine['LineNumber']
                        column = dwLine['LineOffset']
                        dw2src_map[key] = self.sFlow.find_source_block(
                            line, column, nodes_s)
                        haveCol = haveCol or (column != 0)
                    if not haveCol:
                        log.warning(
                            "No column numbers in debug info. Turn on to improve mapping."
                        )

                # generate fallback (only by line number)
                lines = {dw['LineNumber'] for k, dw in allDwLines.items()}
                srcline2sbb = self.sFlow.find_source_blocks_line_only(
                    lines, nodes_s)
                dw2src_map_line_only = {
                    key: srcline2sbb[dw['LineNumber']]
                    for key, dw in allDwLines.items()
                }
                # --
                # maps contain ALL sBBs for those bBBs which have no debug info
                return dw2src_map, dw2src_map_line_only

            def add_refs_by_location():
                """for one bin-BB 'n', append potential src-equivalents to set p_b"""
                dwLines = self.bFlow._dwData.get_dw_lines(
                    self.bFlow.get_addr_ranges(n))
                for key, dwLine in dwLines.items():
                    mapped_source_block = dw2src_map_precise.get(key, None)
                    if mapped_source_block is None:
                        mapped_blocks = dw2src_map_fallback[key]
                        log.debug(
                            "dwline with key {} in block {} has following matching blocks"
                            "(line only): {}".format(key, n, mapped_blocks))
                        for b in mapped_blocks:
                            p_b.add(b)
                    else:
                        p_b.add(mapped_source_block)

            def add_refs_by_fcalls():
                b_fcalls = self.bFlow.get_func_calls(n)
                for f in b_fcalls:
                    if f not in s_funccalls_inv:
                        continue
                    for source_node in s_funccalls_inv[f]:
                        if source_node in nodes_s:
                            p_b.add(source_node)
                            log.debug(
                                "*********---- Added s_node fcall reference: {}"
                                .format(source_node))

            def add_refs_by_varaccess():
                # FIXME: implement matching by accessed variables
                pass

            # get potential maps: addr -> src-BBs
            dw2src_map_precise, dw2src_map_fallback = get_sblocks_matching_dwarflines(
            )
            # generate self-sorting list of potential src nodes for each bin node
            ret_map_bin2src = dict()
            for n in nodes_b:
                if self.hom_order_src == 'predominator-first':
                    # noinspection PyArgumentList
                    p_b = SortedSet(
                        key=self.sFlow.predom_tree().get_preorder_number)
                elif self.hom_order_src == 'postdominator-first':
                    # noinspection PyArgumentList
                    p_b = SortedSet(
                        key=self.sFlow.postdom_tree().get_preorder_number)
                elif self.hom_order_src == 'predominated-first':
                    # noinspection PyArgumentList
                    p_b = SortedSet(key=lambda x: -self.sFlow.predom_tree().
                                    get_preorder_number(x))
                elif self.hom_order_src == 'postdominated-first':
                    # noinspection PyArgumentList
                    p_b = SortedSet(key=lambda x: -self.sFlow.postdom_tree().
                                    get_preorder_number(x))
                else:
                    assert False, "Invalid argument (self.hom_order_src)."
                # fill the list:
                add_refs_by_location()
                add_refs_by_fcalls()
                add_refs_by_varaccess()
                ret_map_bin2src[n] = p_b
            # --
            return ret_map_bin2src  # bin node -> potential src nodes (SortedSet)

        def get_original_loop_id(tfg, regionId):
            assert isinstance(tfg, transformer.TransformedFlowGraph)
            # --
            # Get region collection
            tfg_regions = tfg.get_region_collection()
            assert regionId in tfg_regions._loopRegions
            # All nodes represent region IDs, the new ones must be reduced loops
            l_region = tfg_regions.get_region(regionId)
            assert l_region is not None, "Invalid region id."
            l_transf = l_region.get_transf()
            # --
            assert isinstance(l_transf, transformation.ReducedLoopTransf)
            return l_transf.get_header_node()

        def compute_dom_homomorphic_map():
            """
            filter map using dominator homomorphism
            :returns GraphMap
            """
            def translate_id(node_id, isBinary):
                """some IDs are newly inserted for collapsed graphs and do not exist in the
                original flow graph -- translate them to their original equivalent"""
                if isBinary:
                    if node_id > self.bFlow.get_max_id():
                        return nodes_new_b[node_id]
                else:
                    if node_id > self.sFlow.get_max_id():
                        return nodes_new_s[node_id]
                return node_id

            def test_homomorphism(binary_nodes):
                """Check whether all the mapping is valid so far"""

                failed_count = 0
                for b in binary_nodes:
                    for b_ in binary_nodes:
                        if b_ == b:
                            continue
                        a = f_map.get(b, None)
                        a_ = f_map.get(b_, None)
                        if a is None or a_ is None:
                            continue
                        # Get original IDs for dominance check
                        og_b = translate_id(b, True)
                        og_b_ = translate_id(b_, True)
                        og_a = translate_id(a, False)
                        og_a_ = translate_id(a_, False)
                        log.debug("b,b_={},{};  a,a_={},{}".format(
                            b, b_, a, a_))
                        log.debug("og_b,og_b_={},{};  og_a,og_a_={},{}".format(
                            og_b, og_b_, og_a, og_a_))
                        if self.bFlow.predom_tree().test_dominance(og_b, og_b_) != \
                                self.sFlow.predom_tree().test_dominance(og_a, og_a_) or \
                                self.bFlow.predom_tree().test_dominance(og_b_, og_b) != \
                                self.sFlow.predom_tree().test_dominance(og_a_, og_a):
                            log.debug(
                                "bin_dominance={}, src_dominance={}".format(
                                    self.bFlow.predom_tree().test_dominance(
                                        og_b, og_b_),
                                    self.sFlow.predom_tree().test_dominance(
                                        og_a, og_a_)))
                            log.debug(
                                "Preorder numbers og_b,og_b_: {},{}".format(
                                    self.bFlow.predom_tree(
                                    ).get_preorder_number(og_b),
                                    self.bFlow.predom_tree().
                                    get_preorder_number(og_b_)))
                            log.debug(
                                "Preorder numbers og_a,og_a_: {},{}".format(
                                    self.sFlow.predom_tree(
                                    ).get_preorder_number(og_a),
                                    self.sFlow.predom_tree().
                                    get_preorder_number(og_a_)))
                            add_back_to_worklist(b)
                            add_back_to_worklist(b_)
                            failed_count += 1
                            log.debug("Homomorphism failed")
                return failed_count

            def add_back_to_worklist(b):
                if b in fixed_points:
                    return
                worklist.add(b)
                f_map[b] = None

            def check_conflict(r, b):
                """check if src-bb r is known to be a bad choice for bin-bb b,
                given the current state of the mapping.
                """
                if r not in f_confl[b]:
                    return False
                # see if any of the known conflicts are already in the map
                hasConflict = False
                for b_, r_ in f_confl[b][r]:
                    if f_map.get(
                            b_,
                            None) == r_:  # is the conflicting one in the map?
                        log.debug(
                            "conflict: {}->{} not allowed because {}->{} in mapping"
                            .format(b, r, b_, r_))
                        hasConflict = True
                        break
                return hasConflict

            def select_reference(b):
                """Among possible references, return the first non-conflicting one"""
                p_b = potential_map_bin2src[b]
                for r in p_b:
                    if not check_conflict(r, b):
                        return r
                return None

            def add_conflict(b, a, b_, a_):
                """
                Store that b->a and b->a' are conflicting decisions
                b*= binary, a*=source
                """
                if a not in f_confl[b]:
                    f_confl[b][a] = set()
                if a_ not in f_confl[b_]:
                    f_confl[b_][a_] = set()
                f_confl[b][a].add((b_, a_))  # b->a conflicts with b'->a'
                f_confl[b_][a_].add((b, a))  # b'->a conflicts with b->a
                log.debug("{}->{} conflicts with {}->{}".format(b, a, b_, a_))

            def remove_ambiguous():
                """Remove all entries from f_map that where we could have confused siblings"""
                def do_level(node):
                    """Dive down dom tree, and check for ambiguity at each level"""
                    mapped_by = dict()  # src-bb -> bin-bb in this btfg
                    for ch in pdt.successors(node):
                        # if has children, their dom. relationships will make it unambig.
                        if ch in f_map and pdt.out_degree(ch) == 0:
                            srcbbs = potential_map_bin2src[ch]
                            for sbb in srcbbs:
                                if sbb not in mapped_by:
                                    mapped_by[sbb] = set()
                                mapped_by[sbb].add(ch)
                    # remove those which have multiple src locations
                    delbb = {
                        bb
                        for _, bbb in mapped_by.iteritems() if len(bbb) > 1
                        for bb in bbb
                    }
                    if delbb:
                        ambiguous_bbb.update(delbb)
                        for db in delbb:
                            del f_map[db]
                    # dive down
                    for ch in pdt.successors(node):
                        do_level(ch)

                ambiguous_bbb = set()
                pdt = self.bFlow.predom_tree().get_tree()
                do_level(self.bFlow.predom_tree().get_root())
                # --
                return ambiguous_bbb

            log.info(
                "Running dominator homomorphism mapping on '{}', order: {}".
                format(btfg.name, self.hom_order))
            if self.hom_order == 'predominated-first':
                worklist = SortedKeyList(iterable=nodes_b,
                                         key=lambda x: -self.bFlow.predom_tree(
                                         ).get_preorder_number(x))
            elif self.hom_order == 'postdominated-first':
                worklist = SortedKeyList(iterable=nodes_b,
                                         key=lambda x: -self.bFlow.
                                         postdom_tree().get_preorder_number(x))
            elif self.hom_order == 'predominator-first':
                worklist = SortedKeyList(
                    iterable=nodes_b,
                    key=self.bFlow.predom_tree().get_preorder_number)
            elif self.hom_order == 'postdominator-first':
                worklist = SortedKeyList(
                    iterable=nodes_b,
                    key=self.bFlow.postdom_tree().get_preorder_number)
            else:
                assert False, "Invalid argument (self.hom_order)."

            # Add known relations between entry and exit nodes of subgraphs & test for safety
            f_map = dict()
            f_map.update(fixed_points)
            log.debug("Fixed points={}".format(f_map.items()))
            assert test_homomorphism(f_map.keys()) == 0, \
                "Initial homomorphism test failed for fixed points."

            f_confl = {n: dict() for n in nodes_b}
            f_confl.update({n: dict() for n in fixed_points.keys()})
            log.debug("Initial worklist={}".format(worklist))
            rounds = 0
            while len(worklist) > 0:
                rounds += 1
                # Select non conflicting elements for all in worklist
                for _ in range(len(worklist)):
                    if self.hom_order == 'pre':
                        b = worklist.pop(
                            -1
                        )  # using preDom, matching bin dominated (body) first
                    else:
                        b = worklist.pop(
                            0
                        )  # using postDom, matching bin dominator (header) first
                    log.debug("Current worklist element: {}".format(b))
                    if b in fixed_points.keys():
                        continue  # don't touch
                    a = select_reference(
                        b)  # multiple b's might pull the same a here.
                    if a is None:
                        log.debug("Only conflicting references for {} left...".
                                  format(b))
                        continue
                    else:
                        f_map[b] = a
                        if not self.quick:
                            # avoids spurious conflicts, but is at least O(n^3)
                            break
                # Test for homomorphism and reject those violating it
                rejected = False
                test_nodes = {
                    k
                    for k, v in f_map.iteritems() if v is not None
                }  # was: nodes_b
                for b in test_nodes:  # reversing improves run-time (heuristic)
                    for b_ in test_nodes:
                        if b_ == b:
                            continue
                        a = f_map.get(b, None)
                        a_ = f_map.get(b_, None)
                        if a is None or a_ is None:  # could still be None if we removed it
                            continue
                        # FIXME: could cache the following
                        fwd_fail = self.bFlow.predom_tree().test_dominance(
                            translate_id(b, True), translate_id(b_, True)) != \
                            self.sFlow.predom_tree().test_dominance(
                                translate_id(a, False), translate_id(a_, False))
                        rev_fail = self.bFlow.predom_tree().test_dominance(
                            translate_id(b_, True), translate_id(b, True)) != \
                            self.sFlow.predom_tree().test_dominance(
                                translate_id(a_, False), translate_id(a, False))
                        if fwd_fail or rev_fail:
                            log.debug(
                                "Dominance check failed: b,a=({},{}) ; b_,a_=({},{})"
                                .format(b, a, b_, a_) +
                                ". Fail type: {}".format(
                                    'both' if fwd_fail and rev_fail else
                                    ('fwd' if fwd_fail else 'rev')))
                            add_conflict(b, a, b_, a_)
                            add_back_to_worklist(b)  # and remove from map
                            add_back_to_worklist(b_)
                            rejected = True
                if not rejected:
                    log.debug("Nothing was rejected by homomorphism")
                log.debug("Map after {} rounds: {}".format(
                    rounds,
                    {k: v
                     for k, v in f_map.iteritems() if v is not None}))
            log.debug(
                "Homomorphism mapper finished on {} after {} rounds".format(
                    btfg.name, rounds))
            # some undistinguishable BBs might have been mapped. Remove to prevent switching some.
            rem_bbs = remove_ambiguous()
            if rem_bbs:
                log.info("{}: Removed {} ambiguous map entries: {}".format(
                    btfg.name, len(rem_bbs), rem_bbs))
            report['ambiguous-bin'] = rem_bbs
            # --
            g = GraphMap(gA=btfg.flow,
                         gB=stfg.flow,
                         dict_map=f_map,
                         name="dominator homomorphism")
            return g

        report = dict()
        flag_isCondensed = btfg.parent is None and stfg.parent is None

        ################
        # Render graphs
        ################
        if self.do_render:
            if flag_isCondensed:
                render.render_graph(btfg.flow.get_graph(),
                                    dir=tempfile.tempdir,
                                    name=btfg.name + "_allreduced",
                                    prefix="bin")
                render.render_graph(stfg.flow.get_graph(),
                                    dir=tempfile.tempdir,
                                    name=stfg.name + "_allreduced",
                                    prefix="src")
            else:
                render.render_graph(btfg.flow.get_graph(),
                                    dir=tempfile.tempdir,
                                    name=btfg.name,
                                    prefix="sub")
                render.render_graph(stfg.flow.get_graph(),
                                    dir=tempfile.tempdir,
                                    name=stfg.name,
                                    prefix="sub")

        ###############
        # Fixed-points
        ###############
        # 0.a Find new nodes, save original loop headers as fixed points
        nodes_b = [
            n for n in btfg.flow.get_graph().nodes
            if n <= self.bFlow.get_max_id()
        ]
        nodes_s = [
            n for n in stfg.flow.get_graph().nodes
            if n <= self.sFlow.get_max_id()
        ]
        fixed_points = dict()

        nodes_new_b = {
            n: get_original_loop_id(btfg.flow, n)
            for n in btfg.flow.get_graph().nodes
            if n > self.bFlow.get_max_id()
        }
        nodes_new_s = {
            n: get_original_loop_id(stfg.flow, n)
            for n in stfg.flow.get_graph().nodes
            if n > self.sFlow.get_max_id()
        }

        b_regions = btfg.flow.get_region_collection()
        s_regions = stfg.flow.get_region_collection()
        for n in nodes_new_b.keys():
            if not b_regions.is_loop_region_matched(n):
                continue
            # Get corresponding new source loop region id
            needed_sid = nodes_new_b[n]
            sub_b = btfg.find(needed_sid)
            partner_s = sub_b.partner
            assert partner_s is not None
            l_source_origin_id = partner_s.loop_id
            l_source_region_id = s_regions.get_loop_region_id(
                l_source_origin_id)
            assert l_source_region_id in nodes_new_s.keys()
            # Add fixed point
            fixed_points[n] = l_source_region_id

        # 0.b Add entry and exit node as fixed point for the condensed flow.
        if flag_isCondensed:
            assert btfg.flow.is_subflow() is not True, "Not a condensed flow."
            assert stfg.flow.is_subflow() is not True, "Not a condensed flow."
            fixed_points[btfg.flow.get_orig_flow_entry_id()] = \
                stfg.flow.get_orig_flow_entry_id()
            fixed_points[btfg.flow.get_orig_flow_exit_id()] = \
                stfg.flow.get_orig_flow_exit_id()

        # 0.c Add entry node of reduced flow if not condensed
        if not flag_isCondensed:
            assert btfg.flow.is_subflow() is True
            assert stfg.flow.is_subflow() is True
            # b_loop_id and s_loop_id are entry node id's
            fixed_points[btfg.loop_id] = stfg.loop_id

        log.debug("Fixed points: {}".format(fixed_points))

        # Build inverse map for function calls found in source flow graph.
        s_funccalls_inv = dict()
        for n in nodes_s:
            fcalls = self.sFlow.get_func_calls(n)
            for f in fcalls:
                el = s_funccalls_inv.get(f, None)
                if el is None:
                    s_funccalls_inv[f] = {n}
                    continue
                s_funccalls_inv[f].add(n)

        #########################################
        # find potential map (based on dbg info)
        #########################################
        # FIXME: Process fcalls and variable accesses after initial homomorphism mapping?
        potential_map_bin2src = compute_potential_map()
        report['dbgMap'] = {
            k: list(v)
            for k, v in potential_map_bin2src.items()
        }

        # Source line info
        log.debug("Source line info:")
        for n in nodes_s:
            log.debug("Node {}, lInfo={}".format(n,
                                                 self.sFlow.get_line_info(n)))

        ###################
        # Filter by homomo
        ###################
        h_map = compute_dom_homomorphic_map()

        report['domHomomorphMap'] = h_map
        report['trust-dbg-info'] = self.trust_dbg_columns
        # --
        return h_map, report
Example #7
0
from flow import render
import logging
import coloredlogs


# Set up logging
logging.basicConfig()
coloredlogs.install(level='DEBUG', fmt='[%(levelname)s] <%(name)s> %(message)s')
log = logging.getLogger()

# Main
dwData = dwarf.DwarfData('./test/benchmarks/maxleaf/debug.json')

# Render subprogram tree
subprogTreeNodes = [node for node in dwData._dieTree.nodes if dwData._dieTree.nodes[node]['tag'] in
                    ['DW_TAG_compile_unit', 'DW_TAG_subprogram', 'DW_TAG_inlined_subroutine',
                     'DW_TAG_lexical_block']]
subprogTreeNodes += [0]
finalNodes = [0]
subprogTree = dwData._dieTree.subgraph(subprogTreeNodes)

for node in subprogTree.nodes:
    die = subprogTree.nodes[node]
    if die['tag'] == 'DW_TAG_compile_unit' and subprogTree.out_degree(node) == 0:
        continue

    finalNodes.append(node)

finalTree = subprogTree.subgraph(finalNodes)
render.render_graph(finalTree, name='subprog_tree', attrs=['tag', 'attrs'])
    def _match_loop_trees():
        """
        Returns potential matches of loops between binary and source control flows.

        Args
            bFlow: Binary flow object.
            sFlow: Source flow object.

        Return
            Dictionary mapping bFlow loop nodes to sFlow loop nodes.
        """
        assert isinstance(bFlow, fparser.control_flow.BinaryControlFlow)
        assert isinstance(sFlow, fparser.control_flow.SourceControlFlow)

        def find_unq_dw(bFlow):
            """
            Find unique dwarf lines for each loop. If one line is ref'd at multiple levels,
            then the innermost wins (process innermost nesting first). See THESIS-Sect. 5.2.1.
            """
            assert isinstance(bFlow, fparser.control_flow.BinaryControlFlow)
            lInfo = bFlow.get_loop_info()

            # Discard root node, sort loop tree nodes according to their rev.preorder number
            sorted_plist = [(lInfo.get_preorder_number(n), n)
                            for n in lInfo._lTree.nodes if n != lInfo._rootId]
            sorted_plist = sorted(sorted_plist, reverse=True, key=lambda tup: tup[0])

            dwUnqMap = {}
            dwLinesAll = {}
            blockKeys = {}
            processedKeys = set()
            for _, n in sorted_plist:
                # log.debug("Finding unique dwLines for loop header {}...".format(n))
                # Get loop nodes
                ln = {n}
                bn = lInfo.get_body_nodes(n)
                if bn is not None:
                    ln = ln.union(bn)

                new_keys_all = set()
                for b in ln:
                    # log.debug("Processing block {}...".format(b))
                    ranges = bFlow.get_addr_ranges(b)
                    dwLines = bFlow._dwData.get_dw_lines(ranges)
                    dwLinesAll.update(dwLines)
                    blockKeys[b] = set(dwLines.keys())

                    for k, l in dwLines.items():
                        # log.debug("DwLine({})={}".format(k,l))
                        pass

                    new_keys = set(dwLines.keys()) - processedKeys
                    for k in new_keys:
                        processedKeys.add(k)
                        # log.debug("Unique key {}".format(k))
                    new_keys_all = new_keys_all.union(new_keys)
                dwUnqMap.update({n: new_keys_all})

            # Print unique dwarf line map
            for k, v in dwUnqMap.items():
                log.debug("Printing unique dwLines for loop block {}:".format(k))
                for l in v:
                    log.debug("dwLine({}): {}".format(l, dwLinesAll[l]))
                log.debug("")
            # --
            return dwUnqMap, dwLinesAll

        def get_sorted_plist(flow):
            assert isinstance(flow, fparser.control_flow.ControlFlow)
            lInfo = flow.get_loop_info()
            # Discard root node, sort loop tree nodes according to their preorder number
            sorted_plist = [(lInfo.get_preorder_number(n), n)
                            for n in lInfo._lTree.nodes if n != lInfo._rootId]
            sorted_plist = sorted(sorted_plist, reverse=True, key=lambda tup: tup[0])
            return sorted_plist

        def get_loop_ranges(sFlow, sorted_plist):
            """
            Get the source ranges of loops
            Return
                Dict keyed by loop nodes in source flow with values consisting of tuples
                (l_min, l_max),where l_min and l_max are dictionaries of the following form:
                    - {'l':line, 'c':column, 'd':discriminator}

            Note
                An AssertionError is raised if a source loop is contained in a single
                line (improper formatting).
            """
            assert isinstance(sFlow, fparser.control_flow.SourceControlFlow)
            lInfo = sFlow.get_loop_info()
            minmax = {}

            for _, lh in sorted_plist:
                bn = lInfo.get_body_nodes(lh)
                if bn is None:
                    bn = {lh}
                else:
                    bn = bn.union([lh])

                for n in bn:
                    l_info = sFlow.get_line_info(n)
                    # Tuple min, max of dict lcd ('l':line, 'c':column, 'd':discriminator)
                    # where d is always 0.
                    l_min = l_info['min']
                    l_max = l_info['max']
                    minmax[n] = (l_min, l_max)
                    if sFlow.is_virtual_node(n):
                        assert n != lh, "Header node in source loop is virtual."
                        continue
                    assert l_min != l_max, "Invalid line info for source node " \
                        "n={} , min {}, max {}.".format(n, l_min, l_max)

            return minmax

        def get_loop_tree(sFlow, lines_minmax, sorted_plist):
            """Returns a loop tree where each node contains a line range 'r' as attr."""

            def get_loop_min_max(lInfo, lh):
                # Returns min, max line found in loop body nodes given loop header lh.
                mm = lines_minmax[lh]
                line_min = mm[0]['l']
                line_max = mm[0]['l']

                bn = lInfo.get_body_nodes(lh)
                if bn is None:
                    return line_min, line_max
                for n in bn:
                    if sFlow.is_virtual_node(n):
                        continue

                    l_min, l_max = lines_minmax[n]
                    if l_max['l'] > line_max:
                        line_max = l_max['l']
                    if l_min['l'] < line_min:
                        line_min = l_min['l']

                return line_min, line_max

            lInfo = sFlow.get_loop_info()
            rTree = nx.DiGraph()
            rTree.add_nodes_from(lInfo._lTree.nodes)
            rTree.add_edges_from(lInfo._lTree.edges)
            rTree.graph['root'] = lInfo._rootId

            for n in rTree.nodes:
                if n == rTree.graph['root']:
                    continue
                line_min, line_max = get_loop_min_max(lInfo, n)
                # annotate some info
                rTree.nodes[n].update(lInfo._lTree.nodes[n])
                rTree.nodes[n]['line_min'] = int(line_min)
                rTree.nodes[n]['line_max'] = int(line_max)
                # --
                if len(list(rTree.successors(n))) == 0:
                    if line_min == line_max:
                        log.warning("Loop {} contained in a single line only.".format(n))
                    continue
                assert line_min != line_max, \
                    "Source loop in single line, min {}, max {}".format(line_min, line_max)

            # Fix max number in outerloops
            for _, n in s_sorted_plist:
                pre = list(rTree.predecessors(n))
                # Skip outermost loops
                if pre == [rTree.graph['root']]:
                    continue
                assert len(pre) == 1
                p = pre[0]

                n_line_max = rTree.nodes[n]['line_max']
                p_line_max = rTree.nodes[p]['line_max']
                if n_line_max > p_line_max:
                    rTree.nodes[p]['line_max'] = n_line_max

            # Sort the outermost loops, save the sorted list of outermost loops ids.
            ol = [(n, rTree.nodes[n]['line_min']) for n in rTree.successors(rTree.graph['root'])]
            ol = sorted(ol, key=lambda tup: tup[1])
            rTree.graph['ol_sorted'] = [n for n, _ in ol]
            # --
            return rTree

        def get_source_loop(rTree, line):
            """
            Returns the loop tree node the given line corresponds to.

            Note
                - Source loops are assumed to be properly formatted, i.e. not contained
                  in a single line, so they can be properly distinguished.
                - For a given line that falls out of the outermost loops scope, i.e.
                  it isn't part of a SCC, then the nearest loop is returned. If the
                  given line is past the last outermost loop scope, then an error
                  is raised.

            Args
                rTree : Source loop tree with annotated line ranges.
                line  : Source file line.

            Return
                Node id in source loop tree.
            """

            def visit_node(n, line):
                l_min = rTree.nodes[n]['line_min']
                l_max = rTree.nodes[n]['line_max']
                if l_min <= line <= l_max:
                    # log.debug("Visiting node n={}, line in range min,max={},{}".format
                    # (n, l_min, l_max))
                    if len(list(rTree.successors(n))) == 0:
                        return n
                    else:
                        for s in rTree.successors(n):
                            # log.debug("Recursive with s={}".format(s))
                            res = visit_node(s, line)
                            if visit_node(s, line) is not None:
                                return res
                        return n
                else:
                    # log.debug("Visiting node n={}, line not in range min,max={},{}.".format
                    # (n, l_min, l_max))
                    return None

            # 1. Get root children
            ol_sorted = rTree.graph['ol_sorted']

            # 2. Iterate over all root children,
            for n in ol_sorted:
                l_min = rTree.nodes[n]['line_min']
                # log.debug("Searching line {}, in loop node n={}, l_min={}".format(line, n, l_min))
                if line < l_min:
                    log.warning(
                        "Found line out of loop scope. line={}, l_min={}".format(line, l_min))
                    # return n
                    return None
                res = visit_node(n, line)
                # log.debug("Result from visiting node is: {}".format(res))
                if res is not None:
                    return res
                else:
                    # Search other loops
                    continue

            # If we land here, then something went wrong, line past last loop,
            # or most likely loop introduced by compiler that is not contained
            # in any SCC of the flow under analysis.
            return None

        # Get loop info
        blInfo = bFlow.get_loop_info()
        slInfo = sFlow.get_loop_info()

        dwUnqMap, dwLinesAll = find_unq_dw(bFlow)

        s_sorted_plist = get_sorted_plist(sFlow)
        b_sorted_plist_r = reversed(get_sorted_plist(bFlow))

        s_lines_minmax = get_loop_ranges(sFlow, s_sorted_plist)
        s_rTree = get_loop_tree(sFlow, s_lines_minmax, s_sorted_plist)

        # export loop tree
        if do_render:
            render.render_graph(s_rTree, dir=tempfile.tempdir,
                                name=sFlow.name + '_looptree', prefix="src",
                                topnode=s_rTree.graph['root'],
                                attrs=['line_min', 'line_max', 'body', 'backPreds'])

        # Map dwLines to source loops
        map_loop_binary = {}
        map_loop_source = {k: set() for _, k in s_sorted_plist}

        for _, b_n in b_sorted_plist_r:
            # Get unique dw keys for this block
            dwLines = dwUnqMap[b_n]
            parent_node = blInfo.get_parent_node(b_n)

            log.debug("")
            log.debug("Matching dw line info for binary loop header node {}:".format(b_n))

            # Check if empty, note that this may happen if a loop is copied
            # multiple times in binary code, sibling loops in binary loop tree
            # are processed "arbitrarily". TODO: Handle this case.
            assert len(dwLines) != 0, "No info for this loop"

            min_dwl = None
            max_dwl = None
            min_sn = None
            max_sn = None
            for l in dwLines:
                dwLine = dwLinesAll[l]
                s_n = get_source_loop(s_rTree, dwLine['LineNumber'])

                if parent_node != blInfo._rootId:
                    assert parent_node in map_loop_binary
                    # TODO: New loops correspond to single dwLines, add extra check
                    #       for this case elsewhere.
                    if s_n == map_loop_binary[parent_node] and len(dwLines) > 1:
                        log.warning("Ignoring dwLine({}).".format(l))
                        continue
                if s_n is None:
                    log.info("Ignoring dwLine({}), not contained in any source loop.".format(l))
                    continue

                if min_dwl is None:
                    min_dwl = dwLine['LineNumber']
                    min_sn = s_n
                if max_dwl is None:
                    max_dwl = dwLine['LineNumber']
                    max_sn = s_n
                if dwLine['LineNumber'] < min_dwl:
                    min_dwl = dwLine['LineNumber']
                    min_sn = s_n
                if dwLine['LineNumber'] > max_dwl:
                    max_dwl = dwLine['LineNumber']
                    max_sn = s_n

                log.debug("dwLine({}) is matched to source loop {}".format(l, s_n))

            if extLoopInfo is not None:
                if str(b_n) in extLoopInfo["loops"]:
                    if "skip" in extLoopInfo["loops"][str(b_n)]:
                        if extLoopInfo["loops"][str(b_n)]["skip"] == "True":
                            log.info("Not matching binary loop {}, ".format(b_n) +
                                     "skip flag set in external loop info file.")
                            map_loop_binary[b_n] = None
                            continue

            if min_sn is None or max_sn is None:
                log.debug("min_dwl={}, max_dwl={}".format(min_dwl, max_dwl))
                log.debug("min_sn={}, max_sn={}".format(min_sn, max_sn))
                log.warning("Could not match binary loop {}.".format(b_n))
                map_loop_binary[b_n] = None
                continue

            p_min = slInfo.get_preorder_number(min_sn)
            p_max = slInfo.get_preorder_number(max_sn)
            if p_min < p_max:
                matched_loop = min_sn
            else:
                matched_loop = max_sn

            log.debug("Matched to source loop {}.".format(matched_loop))
            map_loop_binary[b_n] = matched_loop
            map_loop_source[matched_loop].add(b_n)

        return map_loop_binary, map_loop_source