def instrument_inline_candidates(graphs, threshold): cache = {None: False} def candidate(graph): try: return cache[graph] except KeyError: res = static_instruction_count(graph) <= threshold cache[graph] = res return res n = 0 for parentgraph in graphs: for block in parentgraph.iterblocks(): ops = block.operations i = len(ops) - 1 while i >= 0: op = ops[i] i -= 1 if op.opname == "direct_call": funcobj = op.args[0].value._obj graph = getattr(funcobj, 'graph', None) if graph is not None: if getattr(getattr(funcobj, '_callable', None), '_dont_inline_', False): continue if candidate(graph): tag = Constant('inline', Void) label = Constant(n, Signed) dummy = Variable() dummy.concretetype = Void count = SpaceOperation('instrument_count', [tag, label], dummy) ops.insert(i + 1, count) n += 1 log.inlining("%d call sites instrumented" % n)
def inline_malloc_removal_phase(config, translator, graphs, inline_threshold, inline_heuristic, call_count_pred=None, inline_graph_from_anywhere=False): # inline functions in each other if inline_threshold: log.inlining("phase with threshold factor: %s" % inline_threshold) log.inlining("heuristic: %s.%s" % (inline_heuristic.__module__, inline_heuristic.__name__)) inline.auto_inline_graphs(translator, graphs, inline_threshold, heuristic=inline_heuristic, call_count_pred=call_count_pred, inline_graph_from_anywhere=inline_graph_from_anywhere) if config.print_statistics: print "after inlining:" print_statistics(translator.graphs[0], translator) # vaporize mallocs if config.mallocs: log.malloc("starting malloc removal") remove_mallocs(translator, graphs) if config.print_statistics: print "after malloc removal:" print_statistics(translator.graphs[0], translator)
def auto_inline_graphs(translator, graphs, threshold, call_count_pred=None, heuristic=inlining_heuristic, inline_graph_from_anywhere=False): if inline_graph_from_anywhere: # it's ok to inline calls to any graph, with the exception of # graphs that would be already exception-transformed ok_to_call = set([graph for graph in translator.graphs if not hasattr(graph, 'exceptiontransformed')]) else: ok_to_call = None callgraph = inlinable_static_callers(graphs, ok_to_call=ok_to_call) count = auto_inlining(translator, threshold, callgraph=callgraph, heuristic=heuristic, call_count_pred=call_count_pred) log.inlining('inlined %d callsites.' % (count,)) for graph in graphs: removenoops.remove_duplicate_casts(graph, translator)
def auto_inlining(translator, threshold=None, callgraph=None, call_count_pred=None, heuristic=inlining_heuristic): assert threshold is not None and threshold != 1 to_cleanup = {} from heapq import heappush, heappop, heapreplace, heapify callers = {} # {graph: {graphs-that-call-it}} callees = {} # {graph: {graphs-that-it-calls}} if callgraph is None: callgraph = inlinable_static_callers(translator.graphs) for graph1, graph2 in callgraph: callers.setdefault(graph2, {})[graph1] = True callees.setdefault(graph1, {})[graph2] = True # the -len(callers) change is OK heap = [(0.0, -len(callers[graph]), graph) for graph in callers] valid_weight = {} try_again = {} lltype_to_classdef = translator.rtyper.lltype_to_classdef_mapping() raise_analyzer = RaiseAnalyzer(translator) count = 0 while heap: weight, _, graph = heap[0] if not valid_weight.get(graph): if always_inline(graph): weight, fixed = 0.0, True else: weight, fixed = heuristic(graph) # Don't let 'weight' be NaN past this point. If we do, # then heapify() might (sometimes, rarely) not do its job # correctly. I suspect it's because the algorithm gets # confused by the fact that both 'a < b' and 'b < a' are # false. A concrete example: [39.0, 0.0, 33.0, nan, nan] # heapifies to [33.0, nan, 39.0, nan, 0.0], but 33.0 is # not the smallest item. if not (weight < 1e9): weight = 1e9 #print ' + cost %7.2f %50s' % (weight, graph.name) heapreplace(heap, (weight, -len(callers[graph]), graph)) valid_weight[graph] = True if not fixed: try_again[graph] = 'initial' continue if weight >= threshold: # finished... unless some graphs not in valid_weight would now # have a weight below the threshold. Re-insert such graphs # at the start of the heap finished = True for i in range(len(heap)): graph = heap[i][2] if not valid_weight.get(graph): heap[i] = (0.0, heap[i][1], graph) finished = False if finished: break else: heapify(heap) continue heappop(heap) if callers[graph]: if translator.config.translation.verbose: log.inlining('%7.2f %50s' % (weight, graph.name)) else: log.dot() for parentgraph in callers[graph]: if parentgraph == graph: continue subcount = 0 try: subcount = inline_function(translator, graph, parentgraph, lltype_to_classdef, raise_analyzer, call_count_pred, cleanup=False) to_cleanup[parentgraph] = True res = bool(subcount) except CannotInline as e: try_again[graph] = str(e) res = CannotInline if res is True: count += subcount # the parentgraph should now contain all calls that were # done by 'graph' for graph2 in callees.get(graph, {}): callees[parentgraph][graph2] = True callers[graph2][parentgraph] = True if parentgraph in try_again: # the parentgraph was previously uninlinable, but it has # been modified. Maybe now we can inline it into further # parents? del try_again[parentgraph] heappush(heap, (0.0, -len(callers[parentgraph]), parentgraph)) valid_weight[parentgraph] = False invalid = [(graph, msg) for graph, msg in try_again.items() if always_inline(graph) is True] if invalid: message = '\n'.join([ "%s has _always_inline_=True but inlining failed:\n\t%s" % (graph, msg) for (graph, msg) in invalid]) raise CannotInline(message) for graph in to_cleanup: cleanup_graph(graph) return count
def auto_inlining(translator, threshold=None, callgraph=None, call_count_pred=None, heuristic=inlining_heuristic): assert threshold is not None and threshold != 1 to_cleanup = {} from heapq import heappush, heappop, heapreplace, heapify callers = {} # {graph: {graphs-that-call-it}} callees = {} # {graph: {graphs-that-it-calls}} if callgraph is None: callgraph = inlinable_static_callers(translator.graphs) for graph1, graph2 in callgraph: callers.setdefault(graph2, {})[graph1] = True callees.setdefault(graph1, {})[graph2] = True # the -len(callers) change is OK heap = [(0.0, -len(callers[graph]), graph) for graph in callers] valid_weight = {} try_again = {} lltype_to_classdef = translator.rtyper.lltype_to_classdef_mapping() raise_analyzer = RaiseAnalyzer(translator) count = 0 while heap: weight, _, graph = heap[0] if not valid_weight.get(graph): if always_inline(graph): weight, fixed = 0.0, True else: weight, fixed = heuristic(graph) #print ' + cost %7.2f %50s' % (weight, graph.name) heapreplace(heap, (weight, -len(callers[graph]), graph)) valid_weight[graph] = True if not fixed: try_again[graph] = 'initial' continue if weight >= threshold: # finished... unless some graphs not in valid_weight would now # have a weight below the threshold. Re-insert such graphs # at the start of the heap finished = True for i in range(len(heap)): graph = heap[i][2] if not valid_weight.get(graph): heap[i] = (0.0, heap[i][1], graph) finished = False if finished: break else: heapify(heap) continue heappop(heap) if callers[graph]: if translator.config.translation.verbose: log.inlining('%7.2f %50s' % (weight, graph.name)) else: log.dot() for parentgraph in callers[graph]: if parentgraph == graph: continue subcount = 0 try: subcount = inline_function(translator, graph, parentgraph, lltype_to_classdef, raise_analyzer, call_count_pred, cleanup=False) to_cleanup[parentgraph] = True res = bool(subcount) except CannotInline, e: try_again[graph] = str(e) res = CannotInline if res is True: count += subcount # the parentgraph should now contain all calls that were # done by 'graph' for graph2 in callees.get(graph, {}): callees[parentgraph][graph2] = True callers[graph2][parentgraph] = True if parentgraph in try_again: # the parentgraph was previously uninlinable, but it has # been modified. Maybe now we can inline it into further # parents? del try_again[parentgraph] heappush(heap, (0.0, -len(callers[parentgraph]), parentgraph)) valid_weight[parentgraph] = False