def do_render_flows(bFlow, sFlow, attrs=None): """render a pair""" bAttrs = attrs if attrs is not None else dict(attrs=bFlow.attrKeys) sAttrs = attrs if attrs is not None else dict(attrs=sFlow.attrKeys) render.render_graph(bFlow.digraph, dir=tempfile.tempdir, name=bFlow.name, prefix="bin", attrs=bAttrs, interactive=True) render.render_graph(sFlow.digraph, dir=tempfile.tempdir, name=sFlow.name, prefix="src", attrs=sAttrs, interactive=True)
def _map_subgraph(self, input_map, btfg, stfg): assert isinstance(input_map, GraphMap) assert isinstance(self.bFlow, cf.BinaryControlFlow) assert isinstance(self.sFlow, cf.SourceControlFlow) assert isinstance(btfg, HierarchicalFlowGraph) assert isinstance(stfg, HierarchicalFlowGraph) # -- btfg_flow = btfg.flow stfg_flow = stfg.flow assert isinstance(btfg_flow, TransformedFlowGraph) assert isinstance(stfg_flow, TransformedFlowGraph) # -- unmapped_nodes = input_map.unmapped() mapped_nodes = input_map.mapped() log.debug("Unmapped binary nodes: {}".format(list(unmapped_nodes))) def Union(a, b): # Move elements of a to b node_sets[b] = node_sets[b].union(node_sets[a]) for n in node_sets[a]: node_lookup[n] = b node_sets[a] = set() def Find(el): assert el in node_lookup return node_lookup[el] def merge_single_paths(merged_nodes, node_sets): """ This algorithm ensures that fixpoints are propagated in simple paths. Notes: This must be done in-order (dfs preorder) to avoid useless path traversals; each unmapped node is visited exactly once.Process dominator tree bottom-up until a fixed point is found (mapped_nodes). Repeat this until all nodes from unmapped_nodes are processed. """ nlist = SortedKeyList( iterable=unmapped_nodes, key=btfg_flow.get_dom_tree().get_preorder_number) # noinspection PyUnreachableCode for n in reversed(nlist): pred = list(btfg_flow.get_graph().predecessors(n)) succ = list(btfg_flow.get_graph().successors(n)) if len(pred) != 1 or len(succ) != 1: continue dom_pred = list( btfg_flow.get_dom_tree()._domTree.predecessors(n)) dom_succ = list( btfg_flow.get_dom_tree()._domTree.successors(n)) assert len(dom_pred) == 1, "Dom tree not really a tree" # First nodes in nlist are leaf nodes in predominator tree if len(dom_succ) == 0: # Assign to predecessor node in predom tree log.debug("Assigning leaf {} to {}.".format(n, dom_pred)) # assert dom_pred[0] in mapped_nodes -> not needed. Union(n, dom_pred[0]) merged_nodes.add(n) continue else: # Not a leaf node in predominator tree assert len(dom_succ) == 1, "Invalid dominator tree." last_succ = Find(dom_succ[0]) # prefer downwards if last_succ in mapped_nodes: # Assign this set to last_succ log.debug( "Assigned {} downwards to fixed point {}.".format( n, last_succ)) Union(n, last_succ) merged_nodes.add(n) continue else: log.debug("Assigned {} upwards to point {}.".format( n, dom_pred[0])) # assert dom_pred[0] in mapped_nodes -> not needed Union(n, dom_pred[0]) merged_nodes.add(n) continue # -- # noinspection PyUnreachableCode assert False, "should not land here" for n in merged_nodes: assert len(node_sets[n]) == 0, "Algorithm failed, must fix." def contract_dominator_tree(merged_nodes, node_sets): last_nodes = unmapped_nodes.difference(merged_nodes) # First pass on the remaining node, rule out specific cases. Head nodes # in simple paths are processed here. first_pass_merged_nodes = set() for n in last_nodes: pred = list(btfg_flow.get_graph().predecessors(n)) succ = list(btfg_flow.get_graph().successors(n)) # dom_pred = list(b_tfg.get_dom_tree()._domTree.predecessors(n)) dom_succ = list( btfg_flow.get_dom_tree()._domTree.successors(n)) if len(pred) == 1 and len(succ) == 1: assert False, "Something missing in the previous step, fix needed..." # Node with multiple in_edges and single out_edge, this node # dominates the successor in domTree, merge to fix_point downwards # if fix_point is already a mapped node, or it is mapped in the previous # step. if len(pred) > 1 and len(succ) == 1: if len(dom_succ) != 1: continue # Check if node is merged in the previous step last_succ = Find(succ[0]) if last_succ in mapped_nodes: # Assign this node to last_succ log.debug( "Assigned node {} with mult_in_edg and single out_edg to {}." .format(n, last_succ)) Union(n, last_succ) first_pass_merged_nodes.add(n) merged_nodes = merged_nodes.union(first_pass_merged_nodes) last_nodes = last_nodes.difference(first_pass_merged_nodes) # Second pass on the remaining nodes, perform generic contraction. Either # structurally, cutting the connected subgraphs of uncolored nodes, or # process nodes iteratively bottom-up in the dominator tree. Uncolored # nodes here are always assigned upwards in dominator tree. nlist = SortedKeyList( iterable=last_nodes, key=btfg_flow.get_dom_tree().get_preorder_number) for n in reversed(nlist): # Pred is already checked to have length 1, domTree is valid dom_pred = list( btfg_flow.get_dom_tree()._domTree.predecessors(n)) last_pred = Find(dom_pred[0]) if last_pred in mapped_nodes: # Already found a fixed point (simple_path) log.debug( "[2nd pass] Assigning {} to fixed node {}".format( n, last_pred)) Union(n, last_pred) merged_nodes.add(n) else: # Assign upwards # log.debug("[2nd pass] Assigning {} upwards to node {}".format(n, dom_pred[0])) # Union(n, dom_pred[0]) log.debug( "[2nd pass] Assigning {} upwards to node {}".format( n, last_pred)) Union(n, last_pred) merged_nodes.add(n) for n in last_nodes: assert len(node_sets[n]) == 0, "Algorithm failed, must fix." # FIXME: of execution order shall be maintained, we must preserve ctrl dep, as well. # Yet another sanity check for n in btfg_flow.get_graph().nodes: m = Find(n) if m not in mapped_nodes: assert False, "Not all unmapped nodes were assigned to mapped points.\n" \ "Unmapped node {}.\n Node Sets: {}".format(m, node_sets) log.info("Completing mapping of {}".format(btfg.name)) if self.do_render: eid = btfg_flow.get_entry_id() render.render_graph(btfg_flow.get_dom_tree()._domTree, dir=tempfile.tempdir, name=self.bFlow.name + '_domTree_' + str(eid), prefix="bin", topnode=eid) node_sets = {n: {n} for n in btfg_flow.get_graph().nodes} node_lookup = {n: n for n in btfg_flow.get_graph().nodes} merged_nodes = set() merge_single_paths(merged_nodes, node_sets) contract_dominator_tree(merged_nodes, node_sets) mapping_dict = dict() for n in btfg_flow.get_graph().nodes: matched_src_node = input_map[Find(n)] assert matched_src_node is not None mapping_dict[n] = matched_src_node # -- smap = GraphMap(btfg_flow, stfg_flow, mapping_dict, "overapprox.map", predecessors=[input_map]) report = dict(comment="no report implemented" ) # FIXME: write a report of stuff done return smap, report
def _map_subgraph(self, input_map, btfg, stfg): assert isinstance(btfg, HierarchicalFlowGraph) assert isinstance(stfg, HierarchicalFlowGraph) def get_original_loop_id(tfg, regionId): assert isinstance(tfg, transformer.TransformedFlowGraph) # -- # Get region collection tfg_regions = tfg.get_region_collection() assert regionId in tfg_regions._loopRegions # All nodes represent region IDs, the new ones must be reduced loops l_region = tfg_regions.get_region(regionId) assert l_region is not None, "Invalid region id." l_transf = l_region.get_transf() # -- assert isinstance(l_transf, transformation.ReducedLoopTransf) return l_transf.get_header_node() def compute_ctrldep_map(): """ filter map using dominator homomorphism :returns GraphMap """ def get_bb_ctrlprops(flow, tfg, ctrldep): """label dependent BBs with the labels of their immediate controlling edges :returns dict(edge_label in tfg: immediately controlled nodes in tfg) """ g = tfg.flow.get_graph() node2edges = dict() # nodes -> edge labels for e, controlled_nodes in ctrldep.iteritems(): if e in g.edges: for c in controlled_nodes: lbl = flow.digraph.edges[e][ 'label'] # must not fail if c not in node2edges: node2edges[c] = set() node2edges[c].add(lbl) # -- make them hashable node2cond = { k: frozenset(v) for k, v in node2edges.iteritems() } return node2cond def get_subgraph_ctrldeps(): """filter flow-wide deps to only hold edges and nodes of this subgraph, and also remove self-dep of loop headers""" def get_and_filter_subgraph(tfg, flow): ctrldep = flow.get_control_dependencies() g = tfg.flow.get_graph() ctrldep_here = { k: set( filter(lambda x: x in g.nodes and x != tfg.loop_id, v)) for k, v in ctrldep.iteritems() if k in g.edges } return ctrldep_here deps_bin = get_and_filter_subgraph(btfg, self.bFlow) deps_src = get_and_filter_subgraph(stfg, self.sFlow) return deps_bin, deps_src f_map = dict() f_map.update(fixed_points) log.info("Running ctrl-dep mapping on '{}'".format(btfg.name)) log.debug("Fixed points={}".format(f_map.items())) ##################### # ctrldep properties ##################### ctrldep_bin, ctrldep_src = get_subgraph_ctrldeps() report["control-dependency"] = dict( bin={str(k): str(list(v)) for k, v in ctrldep_bin.iteritems()}, src={str(k): str(list(v)) for k, v in ctrldep_src.iteritems()}) bnode2ctrl = get_bb_ctrlprops(self.bFlow, btfg, ctrldep_bin) snode2ctrl = get_bb_ctrlprops(self.sFlow, stfg, ctrldep_src) report['node-ctrl-props'] = dict( bin={ k: " || ".join(list(v)) for k, v in bnode2ctrl.iteritems() }, src={ k: " || ".join(list(v)) for k, v in snode2ctrl.iteritems() }) ########### # matching ########### # reverse snode to match them rev = dict() for node, cond in snode2ctrl.iteritems(): if cond not in rev: rev[cond] = {node} else: rev[cond].add(node) for bb, cond in bnode2ctrl.iteritems(): snodes = rev.get(cond, {}) if snodes: log.debug("{}: bin-{} maps to src nodes: {}".format( btfg.name, bb, snodes)) # If there are multiple src BBs, we can annotate to any of them # however, some of them are loop headers. Do not annotate there. found = False for sn in snodes: # FIXME: see paper whether they have addressed that one. loc = self.sFlow.get_line_info(sn) if 'min' in loc and loc['min'].get('l', 0) > 0: f_map[bb] = sn found = True break if not found: log.debug("{} No valid src locations for {}".format( btfg.name, bb)) else: log.debug("{}: no match for bin-{}, cond={}".format( btfg.name, bb, cond)) # -- g = GraphMap(gA=btfg.flow, gB=stfg.flow, dict_map=f_map, name="ctrl dependency") return g report = dict() flag_isCondensed = btfg.parent is None and stfg.parent is None ################ # Render graphs ################ if self.do_render: if flag_isCondensed: render.render_graph(btfg.flow.get_graph(), dir=tempfile.tempdir, name=btfg.name + "_allreduced", prefix="bin") render.render_graph(stfg.flow.get_graph(), dir=tempfile.tempdir, name=stfg.name + "_allreduced", prefix="src") else: render.render_graph(btfg.flow.get_graph(), dir=tempfile.tempdir, name=btfg.name, prefix="sub") render.render_graph(stfg.flow.get_graph(), dir=tempfile.tempdir, name=stfg.name, prefix="sub") ############### # Fixed-points ############### # 0.a Find new nodes, save original loop headers as fixed points fixed_points = dict() nodes_new_b = { n: get_original_loop_id(btfg.flow, n) for n in btfg.flow.get_graph().nodes if n > self.bFlow.get_max_id() } nodes_new_s = { n: get_original_loop_id(stfg.flow, n) for n in stfg.flow.get_graph().nodes if n > self.sFlow.get_max_id() } b_regions = btfg.flow.get_region_collection() s_regions = stfg.flow.get_region_collection() for n in nodes_new_b.keys(): if not b_regions.is_loop_region_matched(n): continue # Get corresponding new source loop region id needed_sid = nodes_new_b[n] sub_b = btfg.find(needed_sid) partner_s = sub_b.partner assert partner_s is not None l_source_origin_id = partner_s.loop_id l_source_region_id = s_regions.get_loop_region_id( l_source_origin_id) assert l_source_region_id in nodes_new_s.keys() # Add fixed point fixed_points[n] = l_source_region_id # 0.b Add entry and exit node as fixed point for the condensed flow. if flag_isCondensed: assert btfg.flow.is_subflow() is not True, "Not a condensed flow." assert stfg.flow.is_subflow() is not True, "Not a condensed flow." fixed_points[btfg.flow.get_orig_flow_entry_id()] = \ stfg.flow.get_orig_flow_entry_id() fixed_points[btfg.flow.get_orig_flow_exit_id()] = \ stfg.flow.get_orig_flow_exit_id() # 0.c Add entry node of reduced flow if not condensed if not flag_isCondensed: assert btfg.flow.is_subflow() is True assert stfg.flow.is_subflow() is True # b_loop_id and s_loop_id are entry node id's fixed_points[btfg.loop_id] = stfg.loop_id log.debug("Fixed points: {}".format(fixed_points)) ########## # Mapping ########## h_map = compute_ctrldep_map() report['ctrlDepMap'] = h_map report['trust-dbg-info'] = self.trust_dbg_columns # -- return h_map, report
def map_flows(bFlow, sFlow, mapper_name, hom_order, extLoopInfo=None, do_render=False, trust_dbg=False): """ Establish a mapping between a pair of source and binary CFGs. Returns a hierarchical graph map that maps bin to source. FIXME: generalize hom_order into mapper arguments. """ def report_tfg(report_dic, tfg, ident): """write some details about a transformed flow graph to the report""" assert ident not in report_dic, "Duplicate transformed flow graph." # -- nodes_to_scan = [ n for n in tfg.get_graph().nodes if n <= tfg._c_flow._maxId ] fcall_dict = {n: tfg._c_flow.get_func_calls(n) for n in nodes_to_scan} report_dic[ident] = { "edges": list(tfg.get_graph().edges), "nodes": list(tfg.get_graph().nodes), "fcalls": fcall_dict } def set_hierarchy_pairs(bhf, shf, matches_s2b, extLoopInfo): """Indicate which subgraphs in hierarchy are pairs, based on the loop matching.""" def walk_level_b(bl): """mark those that shall be skipped""" if extLoopInfo and str( bl.loop_id ) in extLoopInfo['loops']: # FIXME: extLoopinfo is str bl.skip = True return for sub_bhf in bl.subflows: walk_level_b(sub_bhf) def walk_level_s2b(bl, sl): """pair all subflows at this level, and send pairs down for another walk""" bl.set_partner(sl) for sub_shf in sl.subflows: needed_bb = matches_s2b[sub_shf.loop_id] sub_bhf = None for this_sub_bhf in bl.subflows: if this_sub_bhf.loop_id == needed_bb: sub_bhf = this_sub_bhf break assert sub_bhf is not None walk_level_s2b(sub_bhf, sub_shf) # -- walk_level_b(bhf) walk_level_s2b(bhf, shf) def mark_matched_loops(matches, report_dic): """Mark which loops have been matched. Mainly for report""" b_rcoll = b_hflow.flow.get_region_collection() s_rcoll = s_hflow.flow.get_region_collection() for s_loop, b_loop in matches.items(): b_r_id = b_rcoll.get_loop_region_id(b_loop) b_rcoll.mark_loop_region_as_matched(b_r_id) s_r_id = s_rcoll.get_loop_region_id(s_loop) s_rcoll.mark_loop_region_as_matched(s_r_id) # add more retails for skipped binary loops (their corresponding binary loop region id) sbl = report_dic.get("skipped_bin_loops", None) skipped_b_loops_region_ids = dict() b_loops_region_collection = b_hflow.flow.get_region_collection() for bLoop in sbl: skipped_b_loops_region_ids[ bLoop] = b_loops_region_collection.get_loop_region_id(bLoop) report_dic["skipped_bin_r_ids"] = skipped_b_loops_region_ids def map_all(chosen_mapper): """ Run sequence of mapppers and return final mapping FIXME: it's a pipeline, implement it in a generic way """ report["mapping_collection"].update(precise=dict(), linelump=dict(), domlump=dict(), complete=dict()) # precise mapper: chosen_mapper.set_report(report["mapping_collection"]["precise"]) pmap, b_hflow0, s_hflow0 = chosen_mapper.compute_mapping() pmap.consistency_check() # lumps some remaining nodes into their direct pre/succ: slmapper = StraightLineLumping(input_hmap=pmap, bFlow=bFlow, sFlow=sFlow, bhFlow=b_hflow0, shFlow=s_hflow0, do_render=False) slmapper.set_report(report["mapping_collection"]["linelump"]) lmap, b_hflow1, s_hflow1 = slmapper.compute_mapping() lmap.consistency_check() lmap.add_predecessor(pmap) # lumps all remaining nodes into dominators: dlmapper = DominatorLumping(input_hmap=lmap, bFlow=bFlow, sFlow=sFlow, bhFlow=b_hflow1, shFlow=s_hflow1, do_render=do_render) dlmapper.set_report(report["mapping_collection"]["domlump"]) dmap, b_hflow2, s_hflow2 = dlmapper.compute_mapping() dmap.consistency_check() dmap.add_predecessor(lmap) # handles skipped subflows: skipmapper = SkipMapper(input_hmap=dmap, bFlow=bFlow, sFlow=sFlow, bhFlow=b_hflow2, shFlow=s_hflow2, annot=extLoopInfo, do_render=False) skipmapper.set_report(report["mapping_collection"]["complete"]) hmap, _, _ = skipmapper.compute_mapping() hmap.consistency_check() hmap.add_predecessor(dmap) # -- stats: stats = pmap.calc_statistics() # MappingStatistics percent_precise = ((100. * stats.data['mapped']) / stats.data['total'] if stats.data['total'] > 0 else 0.) log.info( "Function '{}': Mapped {} (sub)graphs with {} nodes, {:.2f}% precise" .format(bFlow.name, stats.data['graphs'], stats.data['total'], percent_precise)) # -- assert isinstance(hmap, gm.HierarchicalGraphMap) return hmap #################### # Initialize report #################### report = { "bin_func_name": bFlow.name, "src_func_name": sFlow.name, "bb_timing": bFlow._blockTimes, "matched_loops": dict(), "flows_bin": dict(), "flows_src": dict(), "mapping_collection": dict(), "mapping_details": dict(), "max_bin_id": bFlow._maxId, "max_src_id": sFlow._maxId } ############# # edge match ############# rpt = edge_matcher.match(bFlow=bFlow, sFlow=sFlow, do_render=do_render, trust_dbg=trust_dbg) report['edge-matches'] = rpt ############################################# # Collapse loops -> condensed/reduced graphs ############################################# # subprogram with N loops will produce N+1 tfgs ( |b_l_tfgs| = N, |b_tfg|=1 ) in a hierarchy b_hflow = transformer.get_reduced_hierarchy(bFlow) s_hflow = transformer.get_reduced_hierarchy(sFlow) report_tfg(report["flows_bin"], b_hflow.flow, "all_reduced") # report top-level report_tfg(report["flows_src"], s_hflow.flow, "all_reduced") if do_render: render.render_graph(bFlow.postdom_tree()._domTree, dir=tempfile.tempdir, name=bFlow.name + '_postDomTree', prefix="bin", topnode=bFlow._exitId, attrs=['num']) render.render_graph(bFlow.predom_tree()._domTree, dir=tempfile.tempdir, name=bFlow.name + '_domTree', prefix="bin", topnode=bFlow._entryId, attrs=['num']) render.render_graph(sFlow.predom_tree()._domTree, dir=tempfile.tempdir, name=sFlow.name + '_domTree', prefix="src", topnode=sFlow._entryId, attrs=['num']) ############################# # Loops & Hierachical decomp ############################# # we match loops (as a whole, not their nodes) first. FIXME: why independent of flow reduction? matched_loops, rpt = loop_matcher.match(bFlow=bFlow, sFlow=sFlow, extLoopInfo=extLoopInfo, do_render=do_render) mark_matched_loops(matched_loops, report_dic=rpt) report["matched_loops"] = rpt set_hierarchy_pairs(b_hflow, s_hflow, matched_loops, extLoopInfo) ########### # ctrl dep ########### def mark_ctrl_edges(g, ebunch): for e in ebunch: g.edges[e]['ctrl'] = True ctrl_bin = bFlow.get_control_dependencies() ctrl_src = sFlow.get_control_dependencies() mark_ctrl_edges(bFlow.digraph, ctrl_bin.keys()) mark_ctrl_edges(sFlow.digraph, ctrl_src.keys()) report["control-dependency"] = dict( bin={str(k): str(list(v)) for k, v in ctrl_bin.iteritems()}, src={str(k): str(list(v)) for k, v in ctrl_src.iteritems()}) ########## # Mapping ########## def select_mapper(): m = None if mapper_name == 'homomorphism': m = HomomorphismMapper(input_hmap=None, sFlow=sFlow, bFlow=bFlow, bhFlow=b_hflow, shFlow=s_hflow, hom_order=hom_order, trust_dbg=trust_dbg, do_render=do_render, check_inputs=True) elif mapper_name == 'ctrldep': m = CtrlDependencyMapper(input_hmap=None, sFlow=sFlow, bFlow=bFlow, bhFlow=b_hflow, shFlow=s_hflow, trust_dbg=trust_dbg, do_render=do_render, check_inputs=True) return m mapper = select_mapper() if mapper is None: log.error("Unknown mapper: {}".format(mapper_name)) try: final_mapping = map_all(mapper) except: import traceback traceback.print_exc() raise ValueError # -- assert isinstance(final_mapping, gm.HierarchicalGraphMap) return final_mapping, report
def do_render_mapping(bFlow, sFlow, hierarchical_map, annot_file): """ Render mapping for a CFG pair """ def add_pre(s, p): return "{}{}".format(p, s) def copy_and_rename(gdst, gfrom, prefix, maxid): """copy nodes, edges and attributes from gsrc to gdst, while adding a prefix""" gdst.add_nodes_from([(add_pre(n, prefix), gfrom.nodes[n]) for n in gfrom.nodes if n <= maxid]) # FIXME: not elegant with maxid gdst.add_edges_from([(add_pre(e[0], prefix), add_pre(e[1], prefix)) for e in gfrom.edges if e[0] <= maxid and e[1] <= maxid]) # node attrs for _n in gfrom.nodes: at = gfrom.nodes[_n] gdst.nodes[add_pre(_n, prefix)].update(at) for _e0, _e1 in gfrom.edges: at = gfrom.edges[(_e0, _e1)] gdst.edges[(add_pre(_e0, prefix), add_pre(_e1, prefix))].update(at) def build_clusters(hmap): """convert hierarchy into subgraph clusters for renderer""" assert isinstance(hmap, gm.HierarchicalGraphMap) parts = hmap.name.split("|") if len(parts) == 2: cbname = parts[0] csname = parts[1] else: cbname = "bin_{}".format(hmap.name) csname = "src_{}".format(hmap.name) cluster_bin = render.GraphCluster(cbname) cluster_src = render.GraphCluster(csname) # children for c in hmap.children: ccb, ccs = build_clusters(c) if ccb is not None: cluster_bin.add_child(ccb) if ccs is not None: cluster_src.add_child(ccs) # myself b_nodes = hmap.mapping.mapped() | hmap.mapping.unmapped() # bin BBs! if hmap.mapping.graph_B is not None: s_nodes = hmap.mapping.graph_B.get_graph().nodes() else: s_nodes = set() if s_nodes: cluster_src.add_nodes(map(lambda x: add_pre(x, "s"), s_nodes)) cluster_src.topnode = add_pre(hmap.mapping.graph_B.entryId, 's') if b_nodes: cluster_bin.add_nodes(map(lambda x: add_pre(x, "b"), b_nodes)) cluster_bin.topnode = add_pre(hmap.mapping.graph_A.entryId, 'b') if cluster_src.empty(): return cluster_bin, None else: cluster_bin.add_relative(cluster_src) return cluster_bin, cluster_src def locstr(loc): disc = loc.get('d', None) col = loc.get('c', 0) line = loc.get('l', 0) s = "{}".format(line) if col != 0: s += ":{}".format(col) if disc is not None: s += " ({})".format(disc) return s def decorate(): """add some more informative attrs to graph""" def deco(flow): for n in flow.digraph.nodes: dec = dict() # line info lid = flow.get_line_info(n) if lid: try: dec = dict(begin=locstr(lid["begin"]), end=locstr(lid["end"])) dec.update( dict(min=locstr(lid["min"]), max=locstr(lid["max"]))) except KeyError: pass # func calls fc = flow.get_func_calls(n) if fc: dec['calls'] = str(fc) # timing if hasattr(flow, "_blockTimes"): try: dec['time'] = flow._blockTimes[n] except KeyError: pass # -- flow.digraph.node[n].update(dec) deco(bFlow) deco(sFlow) decorate() ############################ # copy both graphs into one ############################ both_graphs = nx.DiGraph() both_graphs.graph.update(bFlow.digraph.graph) # take graph attrs from bin copy_and_rename(both_graphs, bFlow.digraph, "b", bFlow.get_max_id()) copy_and_rename(both_graphs, sFlow.digraph, "s", sFlow.get_max_id()) # get cluster/subgraph hierarchy clusters = build_clusters(hierarchical_map) if not clusters[0].check_cluster(): log.warning("Cluster bin of {} inconsistent".format( hierarchical_map.name)) if not clusters[1].check_cluster(): log.warning("Cluster src of {} inconsistent".format( hierarchical_map.name)) # get the mapping itself flatmap = hierarchical_map.flatten() allmap = { add_pre(k, 'b'): add_pre(v, 's') for k, v in flatmap.get_map().iteritems() } # add one commonn entry/exit node for better visualization entries = ['b{}'.format(bFlow._entryId), 's{}'.format(sFlow._entryId)] exits = ['b{}'.format(bFlow._exitId), 's{}'.format(sFlow._exitId)] both_graphs.add_node("entry", shape='diamond') both_graphs.add_node("exit", shape='diamond') both_graphs.add_edges_from([('entry', n) for n in entries]) both_graphs.add_edges_from([(n, 'exit') for n in exits]) # mark control edges in color: for e in both_graphs.edges: if 'ctrl' in both_graphs.edges[e]: both_graphs.edges[e]['color'] = both_graphs.edges[e][ 'fontcolor'] = 'red' ############################## # mark precisely mapped nodes ############################## precise_nodes = set() try: precise_map = _get_last_precise_map(hierarchical_map) log.info("Rendering mapping of '{}' with precise map='{}'".format( hierarchical_map.name, precise_map.mapping.name())) # flatten and colorize nodes pflatmap = precise_map.flatten() mapped_nodes = [add_pre(n, 'b') for n in pflatmap.mapped()] precise_nodes |= set(mapped_nodes) for n in mapped_nodes: both_graphs.nodes[n].update( dict(fillcolor='darkolivegreen1', style='filled')) except AssertionError: log.warning("cannot highlight precisely mapped nodes in mapping") ################### # add mapping edges ################### if precise_nodes: mapping_edges = [(k, v) for k, v in allmap.iteritems() if k in precise_nodes] else: mapping_edges = [(k, v) for k, v in allmap.iteritems()] both_graphs.add_edges_from(mapping_edges, virtual=True, color='gray80', style='dashed', constraint='False') # ... and finally render imgname = "map_" + hierarchical_map.name render.render_graph(G=both_graphs, dir=tempfile.tempdir, name=imgname, interactive=True, topnode='entry', botnode='exit', clusters=list(clusters), keepfiles=False, attrs=('color', 'fillcolor', 'fontcolor', 'style', 'shape', 'arrowhead', 'constraint', 'begin', 'end', 'min', 'max', 'time', 'calls'))
def _map_subgraph(self, input_map, btfg, stfg): assert isinstance(btfg, HierarchicalFlowGraph) assert isinstance(stfg, HierarchicalFlowGraph) def compute_potential_map(): """ Using debug info and function calls, determine a potential mapping bin->[srcbb] :returns dict(binBB -> list(srcBB)) """ def get_sblocks_matching_dwarflines(): """ Find sBBs that match to each dwarf line (i.e., ~adress) :returns tuple (map_precise, map_fallback) where map_precise: dw line -> src BB. considering column/discr info map_fallback: dw line -> src BB. considering only line numbers """ def verbose_unique(): """just debug output""" nodes_unq_dwlines = self.bFlow.get_unique_dw_lines( nodes_b) # line -> unique BB log.debug("Unique dwlines:") for dwl_i, node in nodes_unq_dwlines.items(): log.debug("Found in node {}, dwl={}".format( node, self.bFlow._dwData._dwData['LineInfoEntries'] [str(dwl_i)])) log.debug("") # verbose_unique() # LUT allDwLines = dict() for n in nodes_b: dwLines = self.bFlow._dwData.get_dw_lines( self.bFlow.get_addr_ranges(n)) allDwLines.update(dwLines) # generate precise (line+col/discr; known to be unreliable with gcc) dw2src_map = dict() if self.trust_dbg_columns: haveCol = False for key, dwLine in allDwLines.items(): line = dwLine['LineNumber'] column = dwLine['LineOffset'] dw2src_map[key] = self.sFlow.find_source_block( line, column, nodes_s) haveCol = haveCol or (column != 0) if not haveCol: log.warning( "No column numbers in debug info. Turn on to improve mapping." ) # generate fallback (only by line number) lines = {dw['LineNumber'] for k, dw in allDwLines.items()} srcline2sbb = self.sFlow.find_source_blocks_line_only( lines, nodes_s) dw2src_map_line_only = { key: srcline2sbb[dw['LineNumber']] for key, dw in allDwLines.items() } # -- # maps contain ALL sBBs for those bBBs which have no debug info return dw2src_map, dw2src_map_line_only def add_refs_by_location(): """for one bin-BB 'n', append potential src-equivalents to set p_b""" dwLines = self.bFlow._dwData.get_dw_lines( self.bFlow.get_addr_ranges(n)) for key, dwLine in dwLines.items(): mapped_source_block = dw2src_map_precise.get(key, None) if mapped_source_block is None: mapped_blocks = dw2src_map_fallback[key] log.debug( "dwline with key {} in block {} has following matching blocks" "(line only): {}".format(key, n, mapped_blocks)) for b in mapped_blocks: p_b.add(b) else: p_b.add(mapped_source_block) def add_refs_by_fcalls(): b_fcalls = self.bFlow.get_func_calls(n) for f in b_fcalls: if f not in s_funccalls_inv: continue for source_node in s_funccalls_inv[f]: if source_node in nodes_s: p_b.add(source_node) log.debug( "*********---- Added s_node fcall reference: {}" .format(source_node)) def add_refs_by_varaccess(): # FIXME: implement matching by accessed variables pass # get potential maps: addr -> src-BBs dw2src_map_precise, dw2src_map_fallback = get_sblocks_matching_dwarflines( ) # generate self-sorting list of potential src nodes for each bin node ret_map_bin2src = dict() for n in nodes_b: if self.hom_order_src == 'predominator-first': # noinspection PyArgumentList p_b = SortedSet( key=self.sFlow.predom_tree().get_preorder_number) elif self.hom_order_src == 'postdominator-first': # noinspection PyArgumentList p_b = SortedSet( key=self.sFlow.postdom_tree().get_preorder_number) elif self.hom_order_src == 'predominated-first': # noinspection PyArgumentList p_b = SortedSet(key=lambda x: -self.sFlow.predom_tree(). get_preorder_number(x)) elif self.hom_order_src == 'postdominated-first': # noinspection PyArgumentList p_b = SortedSet(key=lambda x: -self.sFlow.postdom_tree(). get_preorder_number(x)) else: assert False, "Invalid argument (self.hom_order_src)." # fill the list: add_refs_by_location() add_refs_by_fcalls() add_refs_by_varaccess() ret_map_bin2src[n] = p_b # -- return ret_map_bin2src # bin node -> potential src nodes (SortedSet) def get_original_loop_id(tfg, regionId): assert isinstance(tfg, transformer.TransformedFlowGraph) # -- # Get region collection tfg_regions = tfg.get_region_collection() assert regionId in tfg_regions._loopRegions # All nodes represent region IDs, the new ones must be reduced loops l_region = tfg_regions.get_region(regionId) assert l_region is not None, "Invalid region id." l_transf = l_region.get_transf() # -- assert isinstance(l_transf, transformation.ReducedLoopTransf) return l_transf.get_header_node() def compute_dom_homomorphic_map(): """ filter map using dominator homomorphism :returns GraphMap """ def translate_id(node_id, isBinary): """some IDs are newly inserted for collapsed graphs and do not exist in the original flow graph -- translate them to their original equivalent""" if isBinary: if node_id > self.bFlow.get_max_id(): return nodes_new_b[node_id] else: if node_id > self.sFlow.get_max_id(): return nodes_new_s[node_id] return node_id def test_homomorphism(binary_nodes): """Check whether all the mapping is valid so far""" failed_count = 0 for b in binary_nodes: for b_ in binary_nodes: if b_ == b: continue a = f_map.get(b, None) a_ = f_map.get(b_, None) if a is None or a_ is None: continue # Get original IDs for dominance check og_b = translate_id(b, True) og_b_ = translate_id(b_, True) og_a = translate_id(a, False) og_a_ = translate_id(a_, False) log.debug("b,b_={},{}; a,a_={},{}".format( b, b_, a, a_)) log.debug("og_b,og_b_={},{}; og_a,og_a_={},{}".format( og_b, og_b_, og_a, og_a_)) if self.bFlow.predom_tree().test_dominance(og_b, og_b_) != \ self.sFlow.predom_tree().test_dominance(og_a, og_a_) or \ self.bFlow.predom_tree().test_dominance(og_b_, og_b) != \ self.sFlow.predom_tree().test_dominance(og_a_, og_a): log.debug( "bin_dominance={}, src_dominance={}".format( self.bFlow.predom_tree().test_dominance( og_b, og_b_), self.sFlow.predom_tree().test_dominance( og_a, og_a_))) log.debug( "Preorder numbers og_b,og_b_: {},{}".format( self.bFlow.predom_tree( ).get_preorder_number(og_b), self.bFlow.predom_tree(). get_preorder_number(og_b_))) log.debug( "Preorder numbers og_a,og_a_: {},{}".format( self.sFlow.predom_tree( ).get_preorder_number(og_a), self.sFlow.predom_tree(). get_preorder_number(og_a_))) add_back_to_worklist(b) add_back_to_worklist(b_) failed_count += 1 log.debug("Homomorphism failed") return failed_count def add_back_to_worklist(b): if b in fixed_points: return worklist.add(b) f_map[b] = None def check_conflict(r, b): """check if src-bb r is known to be a bad choice for bin-bb b, given the current state of the mapping. """ if r not in f_confl[b]: return False # see if any of the known conflicts are already in the map hasConflict = False for b_, r_ in f_confl[b][r]: if f_map.get( b_, None) == r_: # is the conflicting one in the map? log.debug( "conflict: {}->{} not allowed because {}->{} in mapping" .format(b, r, b_, r_)) hasConflict = True break return hasConflict def select_reference(b): """Among possible references, return the first non-conflicting one""" p_b = potential_map_bin2src[b] for r in p_b: if not check_conflict(r, b): return r return None def add_conflict(b, a, b_, a_): """ Store that b->a and b->a' are conflicting decisions b*= binary, a*=source """ if a not in f_confl[b]: f_confl[b][a] = set() if a_ not in f_confl[b_]: f_confl[b_][a_] = set() f_confl[b][a].add((b_, a_)) # b->a conflicts with b'->a' f_confl[b_][a_].add((b, a)) # b'->a conflicts with b->a log.debug("{}->{} conflicts with {}->{}".format(b, a, b_, a_)) def remove_ambiguous(): """Remove all entries from f_map that where we could have confused siblings""" def do_level(node): """Dive down dom tree, and check for ambiguity at each level""" mapped_by = dict() # src-bb -> bin-bb in this btfg for ch in pdt.successors(node): # if has children, their dom. relationships will make it unambig. if ch in f_map and pdt.out_degree(ch) == 0: srcbbs = potential_map_bin2src[ch] for sbb in srcbbs: if sbb not in mapped_by: mapped_by[sbb] = set() mapped_by[sbb].add(ch) # remove those which have multiple src locations delbb = { bb for _, bbb in mapped_by.iteritems() if len(bbb) > 1 for bb in bbb } if delbb: ambiguous_bbb.update(delbb) for db in delbb: del f_map[db] # dive down for ch in pdt.successors(node): do_level(ch) ambiguous_bbb = set() pdt = self.bFlow.predom_tree().get_tree() do_level(self.bFlow.predom_tree().get_root()) # -- return ambiguous_bbb log.info( "Running dominator homomorphism mapping on '{}', order: {}". format(btfg.name, self.hom_order)) if self.hom_order == 'predominated-first': worklist = SortedKeyList(iterable=nodes_b, key=lambda x: -self.bFlow.predom_tree( ).get_preorder_number(x)) elif self.hom_order == 'postdominated-first': worklist = SortedKeyList(iterable=nodes_b, key=lambda x: -self.bFlow. postdom_tree().get_preorder_number(x)) elif self.hom_order == 'predominator-first': worklist = SortedKeyList( iterable=nodes_b, key=self.bFlow.predom_tree().get_preorder_number) elif self.hom_order == 'postdominator-first': worklist = SortedKeyList( iterable=nodes_b, key=self.bFlow.postdom_tree().get_preorder_number) else: assert False, "Invalid argument (self.hom_order)." # Add known relations between entry and exit nodes of subgraphs & test for safety f_map = dict() f_map.update(fixed_points) log.debug("Fixed points={}".format(f_map.items())) assert test_homomorphism(f_map.keys()) == 0, \ "Initial homomorphism test failed for fixed points." f_confl = {n: dict() for n in nodes_b} f_confl.update({n: dict() for n in fixed_points.keys()}) log.debug("Initial worklist={}".format(worklist)) rounds = 0 while len(worklist) > 0: rounds += 1 # Select non conflicting elements for all in worklist for _ in range(len(worklist)): if self.hom_order == 'pre': b = worklist.pop( -1 ) # using preDom, matching bin dominated (body) first else: b = worklist.pop( 0 ) # using postDom, matching bin dominator (header) first log.debug("Current worklist element: {}".format(b)) if b in fixed_points.keys(): continue # don't touch a = select_reference( b) # multiple b's might pull the same a here. if a is None: log.debug("Only conflicting references for {} left...". format(b)) continue else: f_map[b] = a if not self.quick: # avoids spurious conflicts, but is at least O(n^3) break # Test for homomorphism and reject those violating it rejected = False test_nodes = { k for k, v in f_map.iteritems() if v is not None } # was: nodes_b for b in test_nodes: # reversing improves run-time (heuristic) for b_ in test_nodes: if b_ == b: continue a = f_map.get(b, None) a_ = f_map.get(b_, None) if a is None or a_ is None: # could still be None if we removed it continue # FIXME: could cache the following fwd_fail = self.bFlow.predom_tree().test_dominance( translate_id(b, True), translate_id(b_, True)) != \ self.sFlow.predom_tree().test_dominance( translate_id(a, False), translate_id(a_, False)) rev_fail = self.bFlow.predom_tree().test_dominance( translate_id(b_, True), translate_id(b, True)) != \ self.sFlow.predom_tree().test_dominance( translate_id(a_, False), translate_id(a, False)) if fwd_fail or rev_fail: log.debug( "Dominance check failed: b,a=({},{}) ; b_,a_=({},{})" .format(b, a, b_, a_) + ". Fail type: {}".format( 'both' if fwd_fail and rev_fail else ('fwd' if fwd_fail else 'rev'))) add_conflict(b, a, b_, a_) add_back_to_worklist(b) # and remove from map add_back_to_worklist(b_) rejected = True if not rejected: log.debug("Nothing was rejected by homomorphism") log.debug("Map after {} rounds: {}".format( rounds, {k: v for k, v in f_map.iteritems() if v is not None})) log.debug( "Homomorphism mapper finished on {} after {} rounds".format( btfg.name, rounds)) # some undistinguishable BBs might have been mapped. Remove to prevent switching some. rem_bbs = remove_ambiguous() if rem_bbs: log.info("{}: Removed {} ambiguous map entries: {}".format( btfg.name, len(rem_bbs), rem_bbs)) report['ambiguous-bin'] = rem_bbs # -- g = GraphMap(gA=btfg.flow, gB=stfg.flow, dict_map=f_map, name="dominator homomorphism") return g report = dict() flag_isCondensed = btfg.parent is None and stfg.parent is None ################ # Render graphs ################ if self.do_render: if flag_isCondensed: render.render_graph(btfg.flow.get_graph(), dir=tempfile.tempdir, name=btfg.name + "_allreduced", prefix="bin") render.render_graph(stfg.flow.get_graph(), dir=tempfile.tempdir, name=stfg.name + "_allreduced", prefix="src") else: render.render_graph(btfg.flow.get_graph(), dir=tempfile.tempdir, name=btfg.name, prefix="sub") render.render_graph(stfg.flow.get_graph(), dir=tempfile.tempdir, name=stfg.name, prefix="sub") ############### # Fixed-points ############### # 0.a Find new nodes, save original loop headers as fixed points nodes_b = [ n for n in btfg.flow.get_graph().nodes if n <= self.bFlow.get_max_id() ] nodes_s = [ n for n in stfg.flow.get_graph().nodes if n <= self.sFlow.get_max_id() ] fixed_points = dict() nodes_new_b = { n: get_original_loop_id(btfg.flow, n) for n in btfg.flow.get_graph().nodes if n > self.bFlow.get_max_id() } nodes_new_s = { n: get_original_loop_id(stfg.flow, n) for n in stfg.flow.get_graph().nodes if n > self.sFlow.get_max_id() } b_regions = btfg.flow.get_region_collection() s_regions = stfg.flow.get_region_collection() for n in nodes_new_b.keys(): if not b_regions.is_loop_region_matched(n): continue # Get corresponding new source loop region id needed_sid = nodes_new_b[n] sub_b = btfg.find(needed_sid) partner_s = sub_b.partner assert partner_s is not None l_source_origin_id = partner_s.loop_id l_source_region_id = s_regions.get_loop_region_id( l_source_origin_id) assert l_source_region_id in nodes_new_s.keys() # Add fixed point fixed_points[n] = l_source_region_id # 0.b Add entry and exit node as fixed point for the condensed flow. if flag_isCondensed: assert btfg.flow.is_subflow() is not True, "Not a condensed flow." assert stfg.flow.is_subflow() is not True, "Not a condensed flow." fixed_points[btfg.flow.get_orig_flow_entry_id()] = \ stfg.flow.get_orig_flow_entry_id() fixed_points[btfg.flow.get_orig_flow_exit_id()] = \ stfg.flow.get_orig_flow_exit_id() # 0.c Add entry node of reduced flow if not condensed if not flag_isCondensed: assert btfg.flow.is_subflow() is True assert stfg.flow.is_subflow() is True # b_loop_id and s_loop_id are entry node id's fixed_points[btfg.loop_id] = stfg.loop_id log.debug("Fixed points: {}".format(fixed_points)) # Build inverse map for function calls found in source flow graph. s_funccalls_inv = dict() for n in nodes_s: fcalls = self.sFlow.get_func_calls(n) for f in fcalls: el = s_funccalls_inv.get(f, None) if el is None: s_funccalls_inv[f] = {n} continue s_funccalls_inv[f].add(n) ######################################### # find potential map (based on dbg info) ######################################### # FIXME: Process fcalls and variable accesses after initial homomorphism mapping? potential_map_bin2src = compute_potential_map() report['dbgMap'] = { k: list(v) for k, v in potential_map_bin2src.items() } # Source line info log.debug("Source line info:") for n in nodes_s: log.debug("Node {}, lInfo={}".format(n, self.sFlow.get_line_info(n))) ################### # Filter by homomo ################### h_map = compute_dom_homomorphic_map() report['domHomomorphMap'] = h_map report['trust-dbg-info'] = self.trust_dbg_columns # -- return h_map, report
from flow import render import logging import coloredlogs # Set up logging logging.basicConfig() coloredlogs.install(level='DEBUG', fmt='[%(levelname)s] <%(name)s> %(message)s') log = logging.getLogger() # Main dwData = dwarf.DwarfData('./test/benchmarks/maxleaf/debug.json') # Render subprogram tree subprogTreeNodes = [node for node in dwData._dieTree.nodes if dwData._dieTree.nodes[node]['tag'] in ['DW_TAG_compile_unit', 'DW_TAG_subprogram', 'DW_TAG_inlined_subroutine', 'DW_TAG_lexical_block']] subprogTreeNodes += [0] finalNodes = [0] subprogTree = dwData._dieTree.subgraph(subprogTreeNodes) for node in subprogTree.nodes: die = subprogTree.nodes[node] if die['tag'] == 'DW_TAG_compile_unit' and subprogTree.out_degree(node) == 0: continue finalNodes.append(node) finalTree = subprogTree.subgraph(finalNodes) render.render_graph(finalTree, name='subprog_tree', attrs=['tag', 'attrs'])
def _match_loop_trees(): """ Returns potential matches of loops between binary and source control flows. Args bFlow: Binary flow object. sFlow: Source flow object. Return Dictionary mapping bFlow loop nodes to sFlow loop nodes. """ assert isinstance(bFlow, fparser.control_flow.BinaryControlFlow) assert isinstance(sFlow, fparser.control_flow.SourceControlFlow) def find_unq_dw(bFlow): """ Find unique dwarf lines for each loop. If one line is ref'd at multiple levels, then the innermost wins (process innermost nesting first). See THESIS-Sect. 5.2.1. """ assert isinstance(bFlow, fparser.control_flow.BinaryControlFlow) lInfo = bFlow.get_loop_info() # Discard root node, sort loop tree nodes according to their rev.preorder number sorted_plist = [(lInfo.get_preorder_number(n), n) for n in lInfo._lTree.nodes if n != lInfo._rootId] sorted_plist = sorted(sorted_plist, reverse=True, key=lambda tup: tup[0]) dwUnqMap = {} dwLinesAll = {} blockKeys = {} processedKeys = set() for _, n in sorted_plist: # log.debug("Finding unique dwLines for loop header {}...".format(n)) # Get loop nodes ln = {n} bn = lInfo.get_body_nodes(n) if bn is not None: ln = ln.union(bn) new_keys_all = set() for b in ln: # log.debug("Processing block {}...".format(b)) ranges = bFlow.get_addr_ranges(b) dwLines = bFlow._dwData.get_dw_lines(ranges) dwLinesAll.update(dwLines) blockKeys[b] = set(dwLines.keys()) for k, l in dwLines.items(): # log.debug("DwLine({})={}".format(k,l)) pass new_keys = set(dwLines.keys()) - processedKeys for k in new_keys: processedKeys.add(k) # log.debug("Unique key {}".format(k)) new_keys_all = new_keys_all.union(new_keys) dwUnqMap.update({n: new_keys_all}) # Print unique dwarf line map for k, v in dwUnqMap.items(): log.debug("Printing unique dwLines for loop block {}:".format(k)) for l in v: log.debug("dwLine({}): {}".format(l, dwLinesAll[l])) log.debug("") # -- return dwUnqMap, dwLinesAll def get_sorted_plist(flow): assert isinstance(flow, fparser.control_flow.ControlFlow) lInfo = flow.get_loop_info() # Discard root node, sort loop tree nodes according to their preorder number sorted_plist = [(lInfo.get_preorder_number(n), n) for n in lInfo._lTree.nodes if n != lInfo._rootId] sorted_plist = sorted(sorted_plist, reverse=True, key=lambda tup: tup[0]) return sorted_plist def get_loop_ranges(sFlow, sorted_plist): """ Get the source ranges of loops Return Dict keyed by loop nodes in source flow with values consisting of tuples (l_min, l_max),where l_min and l_max are dictionaries of the following form: - {'l':line, 'c':column, 'd':discriminator} Note An AssertionError is raised if a source loop is contained in a single line (improper formatting). """ assert isinstance(sFlow, fparser.control_flow.SourceControlFlow) lInfo = sFlow.get_loop_info() minmax = {} for _, lh in sorted_plist: bn = lInfo.get_body_nodes(lh) if bn is None: bn = {lh} else: bn = bn.union([lh]) for n in bn: l_info = sFlow.get_line_info(n) # Tuple min, max of dict lcd ('l':line, 'c':column, 'd':discriminator) # where d is always 0. l_min = l_info['min'] l_max = l_info['max'] minmax[n] = (l_min, l_max) if sFlow.is_virtual_node(n): assert n != lh, "Header node in source loop is virtual." continue assert l_min != l_max, "Invalid line info for source node " \ "n={} , min {}, max {}.".format(n, l_min, l_max) return minmax def get_loop_tree(sFlow, lines_minmax, sorted_plist): """Returns a loop tree where each node contains a line range 'r' as attr.""" def get_loop_min_max(lInfo, lh): # Returns min, max line found in loop body nodes given loop header lh. mm = lines_minmax[lh] line_min = mm[0]['l'] line_max = mm[0]['l'] bn = lInfo.get_body_nodes(lh) if bn is None: return line_min, line_max for n in bn: if sFlow.is_virtual_node(n): continue l_min, l_max = lines_minmax[n] if l_max['l'] > line_max: line_max = l_max['l'] if l_min['l'] < line_min: line_min = l_min['l'] return line_min, line_max lInfo = sFlow.get_loop_info() rTree = nx.DiGraph() rTree.add_nodes_from(lInfo._lTree.nodes) rTree.add_edges_from(lInfo._lTree.edges) rTree.graph['root'] = lInfo._rootId for n in rTree.nodes: if n == rTree.graph['root']: continue line_min, line_max = get_loop_min_max(lInfo, n) # annotate some info rTree.nodes[n].update(lInfo._lTree.nodes[n]) rTree.nodes[n]['line_min'] = int(line_min) rTree.nodes[n]['line_max'] = int(line_max) # -- if len(list(rTree.successors(n))) == 0: if line_min == line_max: log.warning("Loop {} contained in a single line only.".format(n)) continue assert line_min != line_max, \ "Source loop in single line, min {}, max {}".format(line_min, line_max) # Fix max number in outerloops for _, n in s_sorted_plist: pre = list(rTree.predecessors(n)) # Skip outermost loops if pre == [rTree.graph['root']]: continue assert len(pre) == 1 p = pre[0] n_line_max = rTree.nodes[n]['line_max'] p_line_max = rTree.nodes[p]['line_max'] if n_line_max > p_line_max: rTree.nodes[p]['line_max'] = n_line_max # Sort the outermost loops, save the sorted list of outermost loops ids. ol = [(n, rTree.nodes[n]['line_min']) for n in rTree.successors(rTree.graph['root'])] ol = sorted(ol, key=lambda tup: tup[1]) rTree.graph['ol_sorted'] = [n for n, _ in ol] # -- return rTree def get_source_loop(rTree, line): """ Returns the loop tree node the given line corresponds to. Note - Source loops are assumed to be properly formatted, i.e. not contained in a single line, so they can be properly distinguished. - For a given line that falls out of the outermost loops scope, i.e. it isn't part of a SCC, then the nearest loop is returned. If the given line is past the last outermost loop scope, then an error is raised. Args rTree : Source loop tree with annotated line ranges. line : Source file line. Return Node id in source loop tree. """ def visit_node(n, line): l_min = rTree.nodes[n]['line_min'] l_max = rTree.nodes[n]['line_max'] if l_min <= line <= l_max: # log.debug("Visiting node n={}, line in range min,max={},{}".format # (n, l_min, l_max)) if len(list(rTree.successors(n))) == 0: return n else: for s in rTree.successors(n): # log.debug("Recursive with s={}".format(s)) res = visit_node(s, line) if visit_node(s, line) is not None: return res return n else: # log.debug("Visiting node n={}, line not in range min,max={},{}.".format # (n, l_min, l_max)) return None # 1. Get root children ol_sorted = rTree.graph['ol_sorted'] # 2. Iterate over all root children, for n in ol_sorted: l_min = rTree.nodes[n]['line_min'] # log.debug("Searching line {}, in loop node n={}, l_min={}".format(line, n, l_min)) if line < l_min: log.warning( "Found line out of loop scope. line={}, l_min={}".format(line, l_min)) # return n return None res = visit_node(n, line) # log.debug("Result from visiting node is: {}".format(res)) if res is not None: return res else: # Search other loops continue # If we land here, then something went wrong, line past last loop, # or most likely loop introduced by compiler that is not contained # in any SCC of the flow under analysis. return None # Get loop info blInfo = bFlow.get_loop_info() slInfo = sFlow.get_loop_info() dwUnqMap, dwLinesAll = find_unq_dw(bFlow) s_sorted_plist = get_sorted_plist(sFlow) b_sorted_plist_r = reversed(get_sorted_plist(bFlow)) s_lines_minmax = get_loop_ranges(sFlow, s_sorted_plist) s_rTree = get_loop_tree(sFlow, s_lines_minmax, s_sorted_plist) # export loop tree if do_render: render.render_graph(s_rTree, dir=tempfile.tempdir, name=sFlow.name + '_looptree', prefix="src", topnode=s_rTree.graph['root'], attrs=['line_min', 'line_max', 'body', 'backPreds']) # Map dwLines to source loops map_loop_binary = {} map_loop_source = {k: set() for _, k in s_sorted_plist} for _, b_n in b_sorted_plist_r: # Get unique dw keys for this block dwLines = dwUnqMap[b_n] parent_node = blInfo.get_parent_node(b_n) log.debug("") log.debug("Matching dw line info for binary loop header node {}:".format(b_n)) # Check if empty, note that this may happen if a loop is copied # multiple times in binary code, sibling loops in binary loop tree # are processed "arbitrarily". TODO: Handle this case. assert len(dwLines) != 0, "No info for this loop" min_dwl = None max_dwl = None min_sn = None max_sn = None for l in dwLines: dwLine = dwLinesAll[l] s_n = get_source_loop(s_rTree, dwLine['LineNumber']) if parent_node != blInfo._rootId: assert parent_node in map_loop_binary # TODO: New loops correspond to single dwLines, add extra check # for this case elsewhere. if s_n == map_loop_binary[parent_node] and len(dwLines) > 1: log.warning("Ignoring dwLine({}).".format(l)) continue if s_n is None: log.info("Ignoring dwLine({}), not contained in any source loop.".format(l)) continue if min_dwl is None: min_dwl = dwLine['LineNumber'] min_sn = s_n if max_dwl is None: max_dwl = dwLine['LineNumber'] max_sn = s_n if dwLine['LineNumber'] < min_dwl: min_dwl = dwLine['LineNumber'] min_sn = s_n if dwLine['LineNumber'] > max_dwl: max_dwl = dwLine['LineNumber'] max_sn = s_n log.debug("dwLine({}) is matched to source loop {}".format(l, s_n)) if extLoopInfo is not None: if str(b_n) in extLoopInfo["loops"]: if "skip" in extLoopInfo["loops"][str(b_n)]: if extLoopInfo["loops"][str(b_n)]["skip"] == "True": log.info("Not matching binary loop {}, ".format(b_n) + "skip flag set in external loop info file.") map_loop_binary[b_n] = None continue if min_sn is None or max_sn is None: log.debug("min_dwl={}, max_dwl={}".format(min_dwl, max_dwl)) log.debug("min_sn={}, max_sn={}".format(min_sn, max_sn)) log.warning("Could not match binary loop {}.".format(b_n)) map_loop_binary[b_n] = None continue p_min = slInfo.get_preorder_number(min_sn) p_max = slInfo.get_preorder_number(max_sn) if p_min < p_max: matched_loop = min_sn else: matched_loop = max_sn log.debug("Matched to source loop {}.".format(matched_loop)) map_loop_binary[b_n] = matched_loop map_loop_source[matched_loop].add(b_n) return map_loop_binary, map_loop_source