def __init__(self, unroll_factor = 4, max_static_unrolling = 8, max_block_size = 50): LoopTransform.__init__(self) self.unroll_factor = unroll_factor if max_static_unrolling is not None: # should we unroll static loops more than ones with unknown iters? self.max_static_unrolling = max_static_unrolling else: self.max_static_unrolling = unroll_factor self.max_block_size = max_block_size
def transform_block(self, stmts): stmts = LoopTransform.transform_block(self, stmts) if self.is_simple_block(stmts, allow_branches = False): reads, writes = self.collect_memory_accesses(stmts) safe_arrays = set([]) for name in reads: # if any alias of this array gets written to, consider it unsafe aliases = self.may_alias.get(name, set([])) aliases.add(name) unsafe = any(alias in writes for alias in aliases) if not unsafe: safe_arrays.add(name) available_expressions = {} new_stmts = [] for stmt in stmts: if stmt.__class__ is Assign: if stmt.rhs.__class__ is Index and \ stmt.rhs.value.__class__ is Var and \ stmt.rhs.value.name in safe_arrays: key = (stmt.rhs.value.name, stmt.rhs.index) if key in available_expressions: stmt.rhs = available_expressions[key] elif stmt.lhs.__class__ is Var: available_expressions[key] = stmt.lhs else: temp = self.fresh_var(stmt.rhs.type, "load") new_stmts.append(Assign(temp, stmt.rhs)) stmt.rhs = temp available_expressions[key] = temp new_stmts.append(stmt) return new_stmts else: return stmts
def transform_block(self, stmts): stmts = LoopTransform.transform_block(self, stmts) if self.is_simple_block(stmts, allow_branches=False): reads, writes = self.collect_memory_accesses(stmts) safe_arrays = set([]) for name in reads: # if any alias of this array gets written to, consider it unsafe aliases = self.may_alias.get(name, set([])) aliases.add(name) unsafe = any(alias in writes for alias in aliases) if not unsafe: safe_arrays.add(name) available_expressions = {} new_stmts = [] for stmt in stmts: if stmt.__class__ is Assign: if stmt.rhs.__class__ is Index and \ stmt.rhs.value.__class__ is Var and \ stmt.rhs.value.name in safe_arrays: key = (stmt.rhs.value.name, stmt.rhs.index) if key in available_expressions: stmt.rhs = available_expressions[key] elif stmt.lhs.__class__ is Var: available_expressions[key] = stmt.lhs else: temp = self.fresh_var(stmt.rhs.type, "load") new_stmts.append(Assign(temp, stmt.rhs)) stmt.rhs = temp available_expressions[key] = temp new_stmts.append(stmt) return new_stmts else: return stmts
def transform_ForLoop(self, stmt): assert self.unroll_factor > 0 if self.unroll_factor == 1: return stmt if stmt.step.__class__ is Const: assert stmt.step.value > 0, "Downward loops not yet supported" stmt = LoopTransform.transform_ForLoop(self, stmt) if not self.is_simple_block(stmt.body) or \ len(stmt.body) > self.max_block_size: return stmt start, stop, step = stmt.start, stmt.stop, stmt.step # if loop has static bounds, fully unroll unless it's too big unroll_factor = self.unroll_factor # number of iterations of loop iterations is not generally known if start.__class__ is Const and \ stop.__class__ is Const and \ step.__class__ is Const: niters = safediv(stop.value - start.value, step.value) if niters <= self.max_static_unrolling: unroll_factor = niters # push the unrolled body onto the stack self.blocks.push() phi_values = None loop_var = self.fresh_var(stmt.var.type, "loop_counter") name_mappings = None for iter_num in xrange(unroll_factor): #self.comment("Unrolling iteration %d" % iter_num) phi_values, curr_names = \ self.copy_loop_body(stmt, loop_var, iter_num, phi_values) if name_mappings is None: name_mappings = curr_names unrolled_body = self.blocks.pop() unroll_value = syntax_helpers.const_int(unroll_factor, stmt.var.type) unrolled_step = self.mul(unroll_value, stmt.step) trunc = self.mul(self.div(self.sub(stop, start), unrolled_step), unrolled_step) unrolled_stop = self.add(stmt.start, trunc) final_merge = {} for (old_name, (input_value, _)) in stmt.merge.iteritems(): first_name_in_loop = name_mappings[old_name].name output_value = phi_values[old_name] final_merge[first_name_in_loop] = (input_value, output_value) unrolled_loop = ForLoop(var = loop_var, start = stmt.start, stop = unrolled_stop, step = unrolled_step, body = unrolled_body, merge = final_merge) if unrolled_loop.start.__class__ is Const and \ unrolled_loop.stop.__class__ is Const and \ unrolled_loop.step.__class__ is Const: start_value = unrolled_loop.start.value stop_value = unrolled_stop.value step_value = unrolled_loop.step.value if start_value + step_value == stop_value: self.assign(unrolled_loop.var, unrolled_loop.start) # assign all loop-carried variables to their initial values if len(final_merge) > 0: self.comment("Initialize loop-carried values") for (acc_name, (input_value, _)) in final_merge.iteritems(): var = Var(acc_name, type = input_value.type) self.assign(var, input_value) # inline loop body self.blocks.top().extend(unrolled_body) # since we're not going to have a cleanup loop, # need to assign all the original phi-carried variables if len(stmt.merge) > 0: self.comment("Finalize loop-carried values") for old_acc_name in stmt.merge.iterkeys(): last_value = phi_values[old_acc_name] var = Var(old_acc_name, last_value.type) self.assign(var, last_value) return None self.blocks.append(unrolled_loop) if unrolled_loop.stop.__class__ is not Const or \ stop.__class__ is not Const or \ unrolled_loop.stop.value != stop.value: cleanup_merge = {} for (old_name, (_, output_value)) in stmt.merge.iteritems(): input_var = name_mappings[old_name] cleanup_merge[old_name] = (input_var, output_value) stmt.merge = cleanup_merge stmt.start = unrolled_loop.stop return stmt
def transform_ForLoop(self, stmt): assert self.unroll_factor > 0 if self.unroll_factor == 1: return stmt if stmt.step.__class__ is Const: assert stmt.step.value > 0, "Downward loops not yet supported" stmt = LoopTransform.transform_ForLoop(self, stmt) if not self.is_simple_block(stmt.body) or \ len(stmt.body) > self.max_block_size: return stmt start, stop, step = stmt.start, stmt.stop, stmt.step # if loop has static bounds, fully unroll unless it's too big unroll_factor = self.unroll_factor # number of iterations of loop iterations is not generally known if start.__class__ is Const and \ stop.__class__ is Const and \ step.__class__ is Const: niters = safediv(stop.value - start.value, step.value) if niters <= self.max_static_unrolling: unroll_factor = niters # push the unrolled body onto the stack self.blocks.push() phi_values = None loop_var = self.fresh_var(stmt.var.type, "loop_counter") name_mappings = None for iter_num in xrange(unroll_factor): #self.comment("Unrolling iteration %d" % iter_num) phi_values, curr_names = \ self.copy_loop_body(stmt, loop_var, iter_num, phi_values) if name_mappings is None: name_mappings = curr_names unrolled_body = self.blocks.pop() unroll_value = syntax.helpers.const_int(unroll_factor, stmt.var.type) unrolled_step = self.mul(unroll_value, stmt.step, "unrolled_step") n_total_iters = self.sub(stop, start, name = "niters") n_unrolled_iters = self.div(n_total_iters, unrolled_step, name = "unrolled_iters") # Python's division doesn't behave like C, so that small_negative/big_positive = -1 # ..which is crappy when expecting truncation! n_unrolled_iters = self.max(n_unrolled_iters, self.int(0), name = "unrolled_iters") trunc = self.mul(n_unrolled_iters, unrolled_step, "trunc") unrolled_stop = self.add(stmt.start, trunc, "unrolled_stop") final_merge = {} for (old_name, (input_value, _)) in stmt.merge.iteritems(): first_name_in_loop = name_mappings[old_name].name output_value = phi_values[old_name] final_merge[first_name_in_loop] = (input_value, output_value) unrolled_loop = ForLoop(var = loop_var, start = stmt.start, stop = unrolled_stop, step = unrolled_step, body = unrolled_body, merge = final_merge) if unrolled_loop.start.__class__ is Const and \ unrolled_loop.stop.__class__ is Const and \ unrolled_loop.step.__class__ is Const: start_value = unrolled_loop.start.value stop_value = unrolled_stop.value step_value = unrolled_loop.step.value if start_value + step_value == stop_value: self.assign(unrolled_loop.var, unrolled_loop.start) # assign all loop-carried variables to their initial values if len(final_merge) > 0: self.comment("Initialize loop-carried values") for (acc_name, (input_value, _)) in final_merge.iteritems(): var = Var(acc_name, type = input_value.type) self.assign(var, input_value) # inline loop body self.blocks.top().extend(unrolled_body) # since we're not going to have a cleanup loop, # need to assign all the original phi-carried variables if len(stmt.merge) > 0: self.comment("Finalize loop-carried values") for old_acc_name in stmt.merge.iterkeys(): last_value = phi_values[old_acc_name] var = Var(old_acc_name, last_value.type) self.assign(var, last_value) return None self.blocks.append(unrolled_loop) if unrolled_loop.stop.__class__ is not Const or \ stop.__class__ is not Const or \ unrolled_loop.stop.value != stop.value: cleanup_merge = {} for (old_name, (_, output_value)) in stmt.merge.iteritems(): input_var = name_mappings[old_name] cleanup_merge[old_name] = (input_var, output_value) stmt.merge = cleanup_merge stmt.start = unrolled_loop.stop return stmt