def run_pass(self, state): state.func_ir = self._strip_phi_nodes(state.func_ir) state.func_ir._definitions = build_definitions(state.func_ir.blocks) # Rerun postprocessor to update metadata post_proc = postproc.PostProcessor(state.func_ir) post_proc.run(emit_dels=False) # Ensure we are not in objectmode generator if state.func_ir.generator_info is not None and state.typemap is not None: # Rebuild generator type # TODO: move this into PostProcessor gentype = state.return_type state_vars = state.func_ir.generator_info.state_vars state_types = [state.typemap[k] for k in state_vars] state.return_type = types.Generator( gen_func=gentype.gen_func, yield_type=gentype.yield_type, arg_types=gentype.arg_types, state_types=state_types, has_finalizer=gentype.has_finalizer, ) return True
def mutate_with_body(self, func_ir, blocks, blk_start, blk_end, body_blocks, dispatcher_factory, extra): ir_utils.dprint_func_ir(func_ir, "Before with changes", blocks=blocks) assert extra is not None args = extra["args"] assert len(args) == 1 arg = args[0] scope = blocks[blk_start].scope loc = blocks[blk_start].loc if isinstance(arg, ir.Arg): arg = ir.Var(scope, arg.name, loc) set_state = [] restore_state = [] # global for Numba itself gvar = scope.redefine("$ngvar", loc) set_state.append(ir.Assign(ir.Global('numba', numba, loc), gvar, loc)) # getattr for set chunksize function in Numba spcattr = ir.Expr.getattr(gvar, 'set_parallel_chunksize', loc) spcvar = scope.redefine("$spc", loc) set_state.append(ir.Assign(spcattr, spcvar, loc)) # call set_parallel_chunksize orig_pc_var = scope.redefine("$save_pc", loc) cs_var = scope.redefine("$cs_var", loc) set_state.append(ir.Assign(arg, cs_var, loc)) spc_call = ir.Expr.call(spcvar, [cs_var], (), loc) set_state.append(ir.Assign(spc_call, orig_pc_var, loc)) restore_spc_call = ir.Expr.call(spcvar, [orig_pc_var], (), loc) restore_state.append(ir.Assign(restore_spc_call, orig_pc_var, loc)) blocks[blk_start].body = (blocks[blk_start].body[1:-1] + set_state + [blocks[blk_start].body[-1]]) blocks[blk_end].body = restore_state + blocks[blk_end].body func_ir._definitions = build_definitions(blocks) ir_utils.dprint_func_ir(func_ir, "After with changes", blocks=blocks)
def _replace_stencil_accesses(self, stencil_ir, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict): """ Convert relative indexing in the stencil kernel to standard indexing by adding the loop index variables to the corresponding dimensions of the array index tuples. """ stencil_blocks = stencil_ir.blocks in_arr = in_args[0] in_arg_names = [x.name for x in in_args] if "standard_indexing" in stencil_func.options: for x in stencil_func.options["standard_indexing"]: if x not in arg_to_arr_dict: raise ValueError("Standard indexing requested for an array " \ "name not present in the stencil kernel definition.") standard_indexed = [ arg_to_arr_dict[x] for x in stencil_func.options["standard_indexing"] ] else: standard_indexed = [] if in_arr.name in standard_indexed: raise ValueError("The first argument to a stencil kernel must use " \ "relative indexing, not standard indexing.") ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc # replace access indices, find access lengths in each dimension need_to_calc_kernel = stencil_func.neighborhood is None # If we need to infer the kernel size then initialize the minimum and # maximum seen indices for each dimension to 0. If we already have # the neighborhood calculated then just convert from neighborhood format # to the separate start and end lengths format used here. if need_to_calc_kernel: start_lengths = ndims * [0] end_lengths = ndims * [0] else: start_lengths = [x[0] for x in stencil_func.neighborhood] end_lengths = [x[1] for x in stencil_func.neighborhood] # Get all the tuples defined in the stencil blocks. tuple_table = ir_utils.get_tuple_table(stencil_blocks) found_relative_index = False # For all blocks in the stencil kernel... for label, block in stencil_blocks.items(): new_body = [] # For all statements in those blocks... for stmt in block.body: # Reject assignments to input arrays. if ((isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['setitem', 'static_setitem'] and stmt.value.value.name in in_arg_names) or ((isinstance(stmt, ir.SetItem) or isinstance(stmt, ir.StaticSetItem)) and stmt.target.name in in_arg_names)): raise ValueError( "Assignments to arrays passed to stencil kernels is not allowed." ) # We found a getitem for some array. If that array is an input # array and isn't in the list of standard indexed arrays then # update min and max seen indices if we are inferring the # kernel size and create a new tuple where the relative offsets # are added to loop index vars to get standard indexing. if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['static_getitem', 'getitem'] and stmt.value.value.name in in_arg_names and stmt.value.value.name not in standard_indexed): index_list = stmt.value.index # handle 1D case if ndims == 1: index_list = [index_list] else: if hasattr(index_list, 'name') and index_list.name in tuple_table: index_list = tuple_table[index_list.name] # indices can be inferred as constant in simple expressions # like -c where c is constant # handled here since this is a common stencil index pattern stencil_ir._definitions = ir_utils.build_definitions( stencil_blocks) index_list = [ _get_const_index_expr(stencil_ir, self.func_ir, v) for v in index_list ] if index_offsets: index_list = self._add_index_offsets( index_list, list(index_offsets), new_body, scope, loc) # update min and max indices if need_to_calc_kernel: # all indices should be integer to be able to calculate # neighborhood automatically if (isinstance(index_list, ir.Var) or any( [not isinstance(v, int) for v in index_list])): raise ValueError( "Variable stencil index only " "possible with known neighborhood") start_lengths = list( map(min, start_lengths, index_list)) end_lengths = list(map(max, end_lengths, index_list)) found_relative_index = True # update access indices index_vars = self._add_index_offsets( parfor_vars, list(index_list), new_body, scope, loc) # new access index tuple if ndims == 1: ind_var = index_vars[0] else: ind_var = ir.Var( scope, mk_unique_var("$parfor_index_ind_var"), loc) self.typemap[ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(index_vars, loc) tuple_assign = ir.Assign(tuple_call, ind_var, loc) new_body.append(tuple_assign) # getitem return type is scalar if all indices are integer if all([ self.typemap[v.name] == types.intp for v in index_vars ]): getitem_return_typ = self.typemap[ stmt.value.value.name].dtype else: # getitem returns an array getitem_return_typ = self.typemap[ stmt.value.value.name] # new getitem with the new index var getitem_call = ir.Expr.getitem(stmt.value.value, ind_var, loc) self.calltypes[getitem_call] = signature( getitem_return_typ, self.typemap[stmt.value.value.name], self.typemap[ind_var.name]) stmt.value = getitem_call new_body.append(stmt) block.body = new_body if need_to_calc_kernel and not found_relative_index: raise ValueError("Stencil kernel with no accesses to " \ "relatively indexed arrays.") return start_lengths, end_lengths
def _init_run(self): self.func_ir._definitions = build_definitions(self.func_ir.blocks) self._parallel_accesses = set() self._T_arrs = set() self.second_pass = False self.in_parallel_parfor = -1
def _rewrite_return(func_ir, target_block_label): """Rewrite a return block inside a with statement. Arguments --------- func_ir: Function IR the CFG to transform target_block_label: int the block index/label of the block containing the POP_BLOCK statement This implements a CFG transformation to insert a block between two other blocks. The input situation is: ┌───────────────┐ │ top │ │ POP_BLOCK │ │ bottom │ └───────┬───────┘ │ ┌───────▼───────┐ │ │ │ RETURN │ │ │ └───────────────┘ If such a pattern is detected in IR, it means there is a `return` statement within a `with` context. The basic idea is to rewrite the CFG as follows: ┌───────────────┐ │ top │ │ POP_BLOCK │ │ │ └───────┬───────┘ │ ┌───────▼───────┐ │ │ │ bottom │ │ │ └───────┬───────┘ │ ┌───────▼───────┐ │ │ │ RETURN │ │ │ └───────────────┘ We split the block that contains the `POP_BLOCK` statement into two blocks. Everything from the beginning of the block up to and including the `POP_BLOCK` statement is considered the 'top' and everything below is considered 'bottom'. Finally the jump statements are re-wired to make sure the CFG remains valid. """ # the block itself from the index target_block = func_ir.blocks[target_block_label] # get the index of the block containing the return target_block_successor_label = target_block.terminator.get_targets()[0] # the return block target_block_successor = func_ir.blocks[target_block_successor_label] # create the new return block with an appropriate label max_label = ir_utils.find_max_label(func_ir.blocks) new_label = max_label + 1 # create the new return block new_block_loc = target_block_successor.loc new_block_scope = ir.Scope(None, loc=new_block_loc) new_block = ir.Block(new_block_scope, loc=new_block_loc) # Split the block containing the POP_BLOCK into top and bottom # Block must be of the form: # ----------------- # <some stmts> # POP_BLOCK # <some more stmts> # JUMP # ----------------- top_body, bottom_body = [], [] pop_blocks = [*target_block.find_insts(ir.PopBlock)] assert len(pop_blocks) == 1 assert len([*target_block.find_insts(ir.Jump)]) == 1 assert isinstance(target_block.body[-1], ir.Jump) pb_marker = pop_blocks[0] pb_is = target_block.body.index(pb_marker) top_body.extend(target_block.body[:pb_is]) top_body.append(ir.Jump(target_block_successor_label, target_block.loc)) bottom_body.extend(target_block.body[pb_is:-1]) bottom_body.append(ir.Jump(new_label, target_block.loc)) # get the contents of the return block return_body = func_ir.blocks[target_block_successor_label].body # finally, re-assign all blocks new_block.body.extend(return_body) target_block_successor.body.clear() target_block_successor.body.extend(bottom_body) target_block.body.clear() target_block.body.extend(top_body) # finally, append the new return block and rebuild the IR properties func_ir.blocks[new_label] = new_block func_ir._definitions = ir_utils.build_definitions(func_ir.blocks) return func_ir