def optguards(self, loop, user_code=False): info = FakeLoopInfo(loop) info.snapshot(loop) for op in loop.operations: if op.is_guard(): op.setdescr(compile.CompileLoopVersionDescr()) dep = DependencyGraph(loop) opt = GuardStrengthenOpt(dep.index_vars) opt.propagate_all_forward(info, loop, user_code) return opt
def optimize_vector(trace, metainterp_sd, jitdriver_sd, warmstate, loop_info, loop_ops, jitcell_token=None): """ Enter the world of SIMD. Bails if it cannot transform the trace. """ user_code = not jitdriver_sd.vec and warmstate.vec_all e = len(loop_ops)-1 assert e > 0 assert rop.is_final(loop_ops[e].getopnum()) loop = VectorLoop(loop_info.label_op, loop_ops[:e], loop_ops[-1]) if user_code and user_loop_bail_fast_path(loop, warmstate): return loop_info, loop_ops # the original loop (output of optimize_unroll) info = LoopVersionInfo(loop_info) version = info.snapshot(loop) loop.setup_vectorization() try: debug_start("vec-opt-loop") metainterp_sd.logger_noopt.log_loop([], loop.finaloplist(label=True), -2, None, None, "pre vectorize") metainterp_sd.profiler.count(Counters.OPT_VECTORIZE_TRY) # start = time.clock() opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, warmstate.vec_cost) index_vars = opt.run_optimization(info, loop) gso = GuardStrengthenOpt(index_vars) gso.propagate_all_forward(info, loop, user_code) end = time.clock() # metainterp_sd.profiler.count(Counters.OPT_VECTORIZED) metainterp_sd.logger_noopt.log_loop([], loop.finaloplist(label=True), -2, None, None, "post vectorize") nano = int((end-start)*10.0**9) debug_print("# vecopt factor: %d opcount: (%d -> %d) took %dns" % \ (opt.unroll_count+1, len(version.loop.operations), len(loop.operations), nano)) debug_stop("vec-opt-loop") # info.label_op = loop.label return info, loop.finaloplist(jitcell_token=jitcell_token, reset_label_token=False) except NotAVectorizeableLoop: debug_stop("vec-opt-loop") # vectorization is not possible return loop_info, version.loop.finaloplist() except NotAProfitableLoop: debug_stop("vec-opt-loop") # cost model says to skip this loop return loop_info, version.loop.finaloplist() except Exception as e: debug_stop("vec-opt-loop") debug_print("failed to vectorize loop. THIS IS A FATAL ERROR!") if we_are_translated(): from rpython.rtyper.lltypesystem import lltype from rpython.rtyper.lltypesystem.lloperation import llop llop.debug_print_traceback(lltype.Void) else: raise finally: loop.teardown_vectorization() return loop_info, loop_ops
def run_optimization(self, metainterp_sd, info, loop, jitcell_token, user_code): self.orig_label_args = loop.label.getarglist_copy() self.linear_find_smallest_type(loop) byte_count = self.smallest_type_bytes vsize = self.vector_ext.vec_size() # stop, there is no chance to vectorize this trace # we cannot optimize normal traces (if there is no label) if vsize == 0: debug_print("vector size is zero") raise NotAVectorizeableLoop if byte_count == 0: debug_print("could not find smallest type") raise NotAVectorizeableLoop if loop.label.getopnum() != rop.LABEL: debug_print("not a loop, can only vectorize loops") raise NotAVectorizeableLoop # find index guards and move to the earliest position graph = self.analyse_index_calculations(loop) if graph is not None: state = SchedulerState(metainterp_sd.cpu, graph) self.schedule(state) # reorder the trace # unroll self.unroll_count = self.get_unroll_count(vsize) align_unroll = self.unroll_count==1 and \ self.vector_ext.should_align_unroll self.unroll_loop_iterations(loop, self.unroll_count, align_unroll_once=align_unroll) # vectorize graph = DependencyGraph(loop) self.find_adjacent_memory_refs(graph) self.extend_packset() self.combine_packset() costmodel = GenericCostModel(self.cpu, self.cost_threshold) state = VecScheduleState(graph, self.packset, self.cpu, costmodel) self.schedule(state) if not state.profitable(): raise NotAProfitableLoop gso = GuardStrengthenOpt(graph.index_vars) gso.propagate_all_forward(info, loop, user_code) # re-schedule the trace -> removes many pure operations graph = DependencyGraph(loop) state = SchedulerState(self.cpu, graph) state.schedule() info.extra_before_label = loop.align_operations for op in loop.align_operations: op.set_forwarded(None) return loop.finaloplist(jitcell_token=jitcell_token, reset_label_token=False)