def _inline_closure(self, work_list, block, i, func_def): require(isinstance(func_def, ir.Expr) and func_def.op == "make_function") inline_closure_call(self.func_ir, self.func_ir.func_id.func.__globals__, block, i, func_def, work_list=work_list) return True
def _get_pair_first_container(func_ir, rhs): assert isinstance(rhs, ir.Expr) and rhs.op == 'pair_first' iternext = get_definition(func_ir, rhs.value) require(isinstance(iternext, ir.Expr) and iternext.op == 'iternext') getiter = get_definition(func_ir, iternext.value) require(isinstance(iternext, ir.Expr) and getiter.op == 'getiter') return getiter.value
def _inline_reduction(self, work_list, block, i, expr, call_name): # only inline reduction in sequential execution, parallel handling # is done in ParforPass. require(self.flags.auto_parallel != True) require(call_name == ('reduce', 'builtins') or call_name == ('reduce', '_functools')) if len(expr.args) != 3: raise TypeError("invalid reduce call, " "three arguments including initial " "value required") check_reduce_func(self.func_ir, expr.args[0]) def reduce_func(f, A, v): s = v it = iter(A) for a in it: s = f(s, a) return s inline_closure_call(self.func_ir, self.func_ir.func_id.func.__globals__, block, i, reduce_func, work_list=work_list) return True
def _find_unsafe_empty_inferred(func_ir, expr): unsafe_empty_inferred require(isinstance(expr, ir.Expr) and expr.op == 'call') callee = expr.func callee_def = get_definition(func_ir, callee) require(isinstance(callee_def, ir.Global)) _make_debug_print("_find_unsafe_empty_inferred")(callee_def.value) return callee_def.value == unsafe_empty_inferred
def _get_const_binary_expr(stencil_ir, func_ir, index_def): """evaluate constant binary expr if possible otherwise, raise GuardException """ require(isinstance(index_def, ir.Expr) and index_def.op == 'binop') arg1 = _get_const_index_expr_inner(stencil_ir, func_ir, index_def.lhs) arg2 = _get_const_index_expr_inner(stencil_ir, func_ir, index_def.rhs) return eval("{}{}{}".format(arg1, index_def.fn, arg2))
def _get_const_binary_expr(stencil_ir, func_ir, index_def): """evaluate constant binary expr if possible otherwise, raise GuardException """ require(isinstance(index_def, ir.Expr) and index_def.op == 'binop') arg1 = _get_const_index_expr_inner(stencil_ir, func_ir, index_def.lhs) arg2 = _get_const_index_expr_inner(stencil_ir, func_ir, index_def.rhs) op = OPERATORS_TO_BUILTINS[index_def.fn] return eval("{}{}{}".format(arg1, op, arg2))
def _fix_stencil_index_offsets(self, options): """ Extract the tuple representing the stencil index offsets from the program IR to provide to StencilFunc. """ offset_tuple = get_definition(self.func_ir, options['index_offsets']) require(hasattr(offset_tuple, 'items')) options['index_offsets'] = tuple(offset_tuple.items) return True
def _get_const_unary_expr(stencil_ir, func_ir, index_def): """evaluate constant unary expr if possible otherwise, raise GuardException """ require(isinstance(index_def, ir.Expr) and index_def.op == 'unary') inner_var = index_def.value # return -c as constant const_val = _get_const_index_expr_inner(stencil_ir, func_ir, inner_var) return eval("{}{}".format(index_def.fn, const_val))
def _get_str_contains_col(self, func_def): require(isinstance(func_def, ir.Expr) and func_def.op == 'getattr') require(func_def.attr == 'contains') str_def = get_definition(self.func_ir, func_def.value) require(isinstance(str_def, ir.Expr) and str_def.op == 'getattr') require(str_def.attr == 'str') col = str_def.value require(col.name in self.df_cols) return col
def _get_const_unary_expr(stencil_ir, func_ir, index_def): """evaluate constant unary expr if possible otherwise, raise GuardException """ require(isinstance(index_def, ir.Expr) and index_def.op == 'unary') inner_var = index_def.value # return -c as constant const_val = _get_const_index_expr_inner(stencil_ir, func_ir, inner_var) op = OPERATORS_TO_BUILTINS[index_def.fn] return eval("{}{}".format(op, const_val))
def _infer_h5_typ(self, rhs): # infer the type if it is of the from f['A']['B'][:] or f['A'][b,:] # with constant filename # TODO: static_getitem has index_var for sure? # make sure it's slice, TODO: support non-slice like integer require(rhs.op in ('getitem', 'static_getitem')) # XXX can't know the type of index here especially if it is bool arr # make sure it is not string (we're not in the middle a select chain) index_var = rhs.index if rhs.op == 'getitem' else rhs.index_var index_val = guard(find_const, self.func_ir, index_var) require(not isinstance(index_val, str)) # index_def = get_definition(self.func_ir, index_var) # require(isinstance(index_def, ir.Expr) and index_def.op == 'call') # require(find_callname(self.func_ir, index_def) == ('slice', 'builtins')) # collect object names until the call val_def = rhs obj_name_list = [] while True: val_def = get_definition(self.func_ir, val_def.value) require(isinstance(val_def, ir.Expr)) if val_def.op == 'call': return self._get_h5_type_file(val_def, obj_name_list) # object_name should be constant str require(val_def.op in ('getitem', 'static_getitem')) val_index_var = val_def.index if val_def.op == 'getitem' else val_def.index_var obj_name = find_str_const(self.func_ir, val_index_var) obj_name_list.append(obj_name)
def _fix_stencil_neighborhood(self, options): """ Extract the two-level tuple representing the stencil neighborhood from the program IR to provide a tuple to StencilFunc. """ # build_tuple node with neighborhood for each dimension dims_build_tuple = get_definition(self.func_ir, options['neighborhood']) require(hasattr(dims_build_tuple, 'items')) res = [] for window_var in dims_build_tuple.items: win_build_tuple = get_definition(self.func_ir, window_var) require(hasattr(win_build_tuple, 'items')) res.append(tuple(win_build_tuple.items)) options['neighborhood'] = tuple(res) return True
def _find_arraycall(func_ir, block): """Look for statement like "x = numpy.array(y)" or "x[..] = y" immediately after the closure call that creates list y (the i-th statement in block). Return the statement index if found, or raise GuardException. """ array_var = None array_call_index = None list_var_dead_after_array_call = False list_var = None i = 0 while i < len(block.body): instr = block.body[i] if isinstance(instr, ir.Del): # Stop the process if list_var becomes dead if list_var and array_var and instr.value == list_var.name: list_var_dead_after_array_call = True break pass elif isinstance(instr, ir.Assign): # Found array_var = array(list_var) lhs = instr.target expr = instr.value if (guard(find_callname, func_ir, expr) == ('array', 'numpy') and isinstance(expr.args[0], ir.Var)): list_var = expr.args[0] array_var = lhs array_stmt_index = i array_kws = dict(expr.kws) elif (isinstance(instr, ir.SetItem) and isinstance(instr.value, ir.Var) and not list_var): list_var = instr.value # Found array_var[..] = list_var, the case for nested array array_var = instr.target array_def = get_definition(func_ir, array_var) require(guard(_find_unsafe_empty_inferred, func_ir, array_def)) array_stmt_index = i array_kws = {} else: # Bail out otherwise break i = i + 1 # require array_var is found, and list_var is dead after array_call. require(array_var and list_var_dead_after_array_call) _make_debug_print("find_array_call")(block.body[array_stmt_index]) return list_var, array_stmt_index, array_kws
def is_whole_slice(typemap, func_ir, var): """ return True if var can be determined to be a whole slice """ require(typemap[var.name] == types.slice2_type) call_expr = get_definition(func_ir, var) require(isinstance(call_expr, ir.Expr) and call_expr.op == 'call') assert len(call_expr.args) == 2 assert find_callname(func_ir, call_expr) == ('slice', 'builtins') arg0_def = get_definition(func_ir, call_expr.args[0]) arg1_def = get_definition(func_ir, call_expr.args[1]) require(isinstance(arg0_def, ir.Const) and arg0_def.value == None) require(isinstance(arg1_def, ir.Const) and arg1_def.value == None) return True
def _get_const_index_expr_inner(stencil_ir, func_ir, index_var): """inner constant inference function that calls constant, unary and binary cases. """ require(isinstance(index_var, ir.Var)) # case where the index is a const itself in outer function var_const = guard(_get_const_two_irs, stencil_ir, func_ir, index_var) if var_const is not None: return var_const # get index definition index_def = ir_utils.get_definition(stencil_ir, index_var) # match inner_var = unary(index_var) var_const = guard(_get_const_unary_expr, stencil_ir, func_ir, index_def) if var_const is not None: return var_const # match inner_var = arg1 + arg2 var_const = guard(_get_const_binary_expr, stencil_ir, func_ir, index_def) if var_const is not None: return var_const raise GuardException
def _inline_reduction(self, work_list, block, i, expr, call_name): # only inline reduction in sequential execution, parallel handling # is done in ParforPass. require(not self.parallel_options.reduction) require(call_name == ('reduce', 'builtins') or call_name == ('reduce', '_functools')) if len(expr.args) != 3: raise TypeError("invalid reduce call, " "three arguments including initial " "value required") check_reduce_func(self.func_ir, expr.args[0]) def reduce_func(f, A, v): s = v it = iter(A) for a in it: s = f(s, a) return s inline_closure_call(self.func_ir, self.func_ir.func_id.func.__globals__, block, i, reduce_func, work_list=work_list) return True
def is_const_slice(typemap, func_ir, var, accept_stride=False): """ return True if var can be determined to be a constant size slice """ require(typemap[var.name] == types.slice2_type or (accept_stride and typemap[var.name] == types.slice3_type)) call_expr = get_definition(func_ir, var) require(isinstance(call_expr, ir.Expr) and call_expr.op == 'call') assert (len(call_expr.args) == 2 or (accept_stride and len(call_expr.args) == 3)) assert find_callname(func_ir, call_expr) == ('slice', 'builtins') arg0_def = get_definition(func_ir, call_expr.args[0]) require(isinstance(arg0_def, ir.Const) and arg0_def.value is None) size_const = find_const(func_ir, call_expr.args[1]) require(isinstance(size_const, int)) return True
def _find_iter_range(func_ir, range_iter_var): """Find the iterator's actual range if it is either range(n), or range(m, n), otherwise return raise GuardException. """ debug_print = _make_debug_print("find_iter_range") range_iter_def = get_definition(func_ir, range_iter_var) debug_print("range_iter_var = ", range_iter_var, " def = ", range_iter_def) require(isinstance(range_iter_def, ir.Expr) and range_iter_def.op == 'getiter') range_var = range_iter_def.value range_def = get_definition(func_ir, range_var) debug_print("range_var = ", range_var, " range_def = ", range_def) require(isinstance(range_def, ir.Expr) and range_def.op == 'call') func_var = range_def.func func_def = get_definition(func_ir, func_var) debug_print("func_var = ", func_var, " func_def = ", func_def) require(isinstance(func_def, ir.Global) and func_def.value == range) nargs = len(range_def.args) if nargs == 1: stop = get_definition(func_ir, range_def.args[0], lhs_only=True) return (0, range_def.args[0], func_def) elif nargs == 2: start = get_definition(func_ir, range_def.args[0], lhs_only=True) stop = get_definition(func_ir, range_def.args[1], lhs_only=True) return (start, stop, func_def) else: raise GuardException
def inline_array(array_var, expr, stmts, list_vars, dels): """Check to see if the given "array_var" is created from a list of constants, and try to inline the list definition as array initialization. Extra statements produced with be appended to "stmts". """ callname = guard(find_callname, func_ir, expr) require(callname and callname[1] == 'numpy' and callname[0] == 'array') require(expr.args[0].name in list_vars) ret_type = calltypes[expr].return_type require(isinstance(ret_type, types.ArrayCompatible) and ret_type.ndim == 1) loc = expr.loc list_var = expr.args[0] array_typ = typemap[array_var.name] debug_print("inline array_var = ", array_var, " list_var = ", list_var) dtype = array_typ.dtype seq, op = find_build_sequence(func_ir, list_var) size = len(seq) size_var = ir.Var(scope, mk_unique_var("size"), loc) size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) size_typ = types.intp size_tuple_typ = types.UniTuple(size_typ, 1) typemap[size_var.name] = size_typ typemap[size_tuple_var.name] = size_tuple_typ stmts.append(_new_definition(func_ir, size_var, ir.Const(size, loc=loc), loc)) stmts.append(_new_definition(func_ir, size_tuple_var, ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) fnty = get_np_ufunc_typ(np.empty) sig = context.resolve_function_type(fnty, (size_typ,), {}) typemap[empty_func.name] = fnty # stmts.append(_new_definition(func_ir, empty_func, ir.Global('empty', np.empty, loc=loc), loc)) empty_call = ir.Expr.call(empty_func, [size_var], {}, loc=loc) calltypes[empty_call] = typing.signature(array_typ, size_typ) stmts.append(_new_definition(func_ir, array_var, empty_call, loc)) for i in range(size): index_var = ir.Var(scope, mk_unique_var("index"), loc) index_typ = types.intp typemap[index_var.name] = index_typ stmts.append(_new_definition(func_ir, index_var, ir.Const(i, loc), loc)) setitem = ir.SetItem(array_var, index_var, seq[i], loc) calltypes[setitem] = typing.signature(types.none, array_typ, index_typ, dtype) stmts.append(setitem) stmts.extend(dels) return True
def _get_const_index_expr_inner(stencil_ir, func_ir, index_var): """inner constant inference function that calls constant, unary and binary cases. """ require(isinstance(index_var, ir.Var)) # case where the index is a const itself in outer function var_const = guard(_get_const_two_irs, stencil_ir, func_ir, index_var) if var_const is not None: return var_const # get index definition index_def = ir_utils.get_definition(stencil_ir, index_var) # match inner_var = unary(index_var) var_const = guard( _get_const_unary_expr, stencil_ir, func_ir, index_def) if var_const is not None: return var_const # match inner_var = arg1 + arg2 var_const = guard( _get_const_binary_expr, stencil_ir, func_ir, index_def) if var_const is not None: return var_const raise GuardException
def _get_h5_type_file(self, val_def, obj_name_list): require(len(obj_name_list) > 0) require(find_callname(self.func_ir, val_def) == ('File', 'h5py')) require(len(val_def.args) > 0) f_name = find_str_const(self.func_ir, val_def.args[0]) obj_name_list.reverse() import h5py f = h5py.File(f_name, 'r') obj = f for obj_name in obj_name_list: obj = obj[obj_name] require(isinstance(obj, h5py.Dataset)) ndims = len(obj.shape) numba_dtype = numba.numpy_support.from_dtype(obj.dtype) f.close() return types.Array(numba_dtype, ndims, 'C')
def find_build_tuple(func_ir, var): """Check if a variable is constructed via build_tuple and return the sequence or raise GuardException otherwise. """ # variable or variable name require(isinstance(var, (ir.Var, str))) var_def = get_definition(func_ir, var) require(isinstance(var_def, ir.Expr)) require(var_def.op == 'build_tuple') return var_def.items
def find_build_sequence(func_ir, var): """Reimplemented from numba.ir_utils.find_build_sequence Added 'build_map' to build_ops list. """ from numba.ir_utils import (require, get_definition) require(isinstance(var, ir.Var)) var_def = get_definition(func_ir, var) require(isinstance(var_def, ir.Expr)) build_ops = ['build_tuple', 'build_list', 'build_set', 'build_map'] require(var_def.op in build_ops) return var_def.items, var_def.op
def _inline_stencil(self, instr, call_name, func_def): from numba.stencil import StencilFunc lhs = instr.target expr = instr.value # We keep the escaping variables of the stencil kernel # alive by adding them to the actual kernel call as extra # keyword arguments, which is ignored anyway. if (isinstance(func_def, ir.Global) and func_def.name == 'stencil' and isinstance(func_def.value, StencilFunc)): if expr.kws: expr.kws += func_def.value.kws else: expr.kws = func_def.value.kws return True # Otherwise we proceed to check if it is a call to numba.stencil require(call_name == ('stencil', 'numba.stencil') or call_name == ('stencil', 'numba')) require(expr not in self._processed_stencils) self._processed_stencils.append(expr) if not len(expr.args) == 1: raise ValueError("As a minimum Stencil requires" " a kernel as an argument") stencil_def = guard(get_definition, self.func_ir, expr.args[0]) require( isinstance(stencil_def, ir.Expr) and stencil_def.op == "make_function") kernel_ir = get_ir_of_code(self.func_ir.func_id.func.__globals__, stencil_def.code) options = dict(expr.kws) if 'neighborhood' in options: fixed = guard(self._fix_stencil_neighborhood, options) if not fixed: raise ValueError( "stencil neighborhood option should be a tuple" " with constant structure such as ((-w, w),)") if 'index_offsets' in options: fixed = guard(self._fix_stencil_index_offsets, options) if not fixed: raise ValueError( "stencil index_offsets option should be a tuple" " with constant structure such as (offset, )") sf = StencilFunc(kernel_ir, 'constant', options) sf.kws = expr.kws # hack to keep variables live sf_global = ir.Global('stencil', sf, expr.loc) self.func_ir._definitions[lhs.name] = [sf_global] instr.value = sf_global return True
def find_str_const(func_ir, var): """Check if a variable can be inferred as a string constant, and return the constant value, or raise GuardException otherwise. """ require(isinstance(var, ir.Var)) var_def = get_definition(func_ir, var) if isinstance(var_def, ir.Const): val = var_def.value require(isinstance(val, str)) return val # only add supported (s1+s2), TODO: extend to other expressions require(isinstance(var_def, ir.Expr) and var_def.op == 'binop' and var_def.fn == operator.add) arg1 = find_str_const(func_ir, var_def.lhs) arg2 = find_str_const(func_ir, var_def.rhs) return arg1 + arg2
def _inline_stencil(self, instr, call_name, func_def): from numba.stencil import StencilFunc lhs = instr.target expr = instr.value # We keep the escaping variables of the stencil kernel # alive by adding them to the actual kernel call as extra # keyword arguments, which is ignored anyway. if (isinstance(func_def, ir.Global) and func_def.name == 'stencil' and isinstance(func_def.value, StencilFunc)): if expr.kws: expr.kws += func_def.value.kws else: expr.kws = func_def.value.kws return True # Otherwise we proceed to check if it is a call to numba.stencil require(call_name == ('stencil', 'numba.stencil') or call_name == ('stencil', 'numba')) require(expr not in self._processed_stencils) self._processed_stencils.append(expr) if not len(expr.args) == 1: raise ValueError("As a minimum Stencil requires" " a kernel as an argument") stencil_def = guard(get_definition, self.func_ir, expr.args[0]) require(isinstance(stencil_def, ir.Expr) and stencil_def.op == "make_function") kernel_ir = get_ir_of_code(self.func_ir.func_id.func.__globals__, stencil_def.code) options = dict(expr.kws) if 'neighborhood' in options: fixed = guard(self._fix_stencil_neighborhood, options) if not fixed: raise ValueError("stencil neighborhood option should be a tuple" " with constant structure such as ((-w, w),)") if 'index_offsets' in options: fixed = guard(self._fix_stencil_index_offsets, options) if not fixed: raise ValueError("stencil index_offsets option should be a tuple" " with constant structure such as (offset, )") sf = StencilFunc(kernel_ir, 'constant', options) sf.kws = expr.kws # hack to keep variables live sf_global = ir.Global('stencil', sf, expr.loc) self.func_ir._definitions[lhs.name] = [sf_global] instr.value = sf_global return True
def _infer_h5_typ(self, rhs): # infer the type if it is of the from f['A']['B'][:] # with constant filename # TODO: static_getitem has index_var for sure? # make sure it's slice, TODO: support non-slice like integer require(rhs.op in ('getitem', 'static_getitem')) index_var = rhs.index if rhs.op == 'getitem' else rhs.index_var index_def = get_definition(self.func_ir, index_var) require(isinstance(index_def, ir.Expr) and index_def.op == 'call') require( find_callname(self.func_ir, index_def) == ('slice', 'builtins')) # collect object names until the call val_def = rhs obj_name_list = [] while True: val_def = get_definition(self.func_ir, val_def.value) require(isinstance(val_def, ir.Expr)) if val_def.op == 'call': return self._get_h5_type_file(val_def, obj_name_list) # object_name should be constant str require(val_def.op in ('getitem', 'static_getitem')) val_index_var = val_def.index if val_def.op == 'getitem' else val_def.index_var obj_name = find_const(self.func_ir, val_index_var) require(isinstance(obj_name, str)) obj_name_list.append(obj_name)
def inline_array(array_var, expr, stmts, list_vars, dels): """Check to see if the given "array_var" is created from a list of constants, and try to inline the list definition as array initialization. Extra statements produced with be appended to "stmts". """ callname = guard(find_callname, func_ir, expr) require(callname and callname[1] == 'numpy' and callname[0] == 'array') require(expr.args[0].name in list_vars) ret_type = calltypes[expr].return_type require(isinstance(ret_type, types.ArrayCompatible) and ret_type.ndim == 1) loc = expr.loc list_var = expr.args[0] # Get the type of the array to be created. array_typ = typemap[array_var.name] debug_print("inline array_var = ", array_var, " list_var = ", list_var) # Get the element type of the array to be created. dtype = array_typ.dtype # Get the sequence of operations to provide values to the new array. seq, _ = find_build_sequence(func_ir, list_var) size = len(seq) # Create a tuple to pass to empty below to specify the new array size. size_var = ir.Var(scope, mk_unique_var("size"), loc) size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) size_typ = types.intp size_tuple_typ = types.UniTuple(size_typ, 1) typemap[size_var.name] = size_typ typemap[size_tuple_var.name] = size_tuple_typ stmts.append(_new_definition(func_ir, size_var, ir.Const(size, loc=loc), loc)) stmts.append(_new_definition(func_ir, size_tuple_var, ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) # The general approach is to create an empty array and then fill # the elements in one-by-one from their specificiation. # Get the numpy type to pass to empty. nptype = types.DType(dtype) # Create a variable to hold the numpy empty function. empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) fnty = get_np_ufunc_typ(np.empty) sig = context.resolve_function_type(fnty, (size_typ,), {'dtype':nptype}) typemap[empty_func.name] = fnty stmts.append(_new_definition(func_ir, empty_func, ir.Global('empty', np.empty, loc=loc), loc)) # We pass two arguments to empty, first the size tuple and second # the dtype of the new array. Here, we created typ_var which is # the dtype argument of the new array. typ_var in turn is created # by getattr of the dtype string on the numpy module. # Create var for numpy module. g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) typemap[g_np_var.name] = types.misc.Module(np) g_np = ir.Global('np', np, loc) stmts.append(_new_definition(func_ir, g_np_var, g_np, loc)) # Create var for result of numpy.<dtype>. typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc) typemap[typ_var.name] = nptype dtype_str = str(dtype) if dtype_str == 'bool': dtype_str = 'bool_' # Get dtype attribute of numpy module. np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc) stmts.append(_new_definition(func_ir, typ_var, np_typ_getattr, loc)) # Create the call to numpy.empty passing the size tuple and dtype var. empty_call = ir.Expr.call(empty_func, [size_var, typ_var], {}, loc=loc) calltypes[empty_call] = typing.signature(array_typ, size_typ, nptype) stmts.append(_new_definition(func_ir, array_var, empty_call, loc)) # Fill in the new empty array one-by-one. for i in range(size): index_var = ir.Var(scope, mk_unique_var("index"), loc) index_typ = types.intp typemap[index_var.name] = index_typ stmts.append(_new_definition(func_ir, index_var, ir.Const(i, loc), loc)) setitem = ir.SetItem(array_var, index_var, seq[i], loc) calltypes[setitem] = typing.signature(types.none, array_typ, index_typ, dtype) stmts.append(setitem) stmts.extend(dels) return True
def inline_array(array_var, expr, stmts, list_vars, dels): """Check to see if the given "array_var" is created from a list of constants, and try to inline the list definition as array initialization. Extra statements produced with be appended to "stmts". """ callname = guard(find_callname, func_ir, expr) require(callname and callname[1] == 'numpy' and callname[0] == 'array') require(expr.args[0].name in list_vars) ret_type = calltypes[expr].return_type require( isinstance(ret_type, types.ArrayCompatible) and ret_type.ndim == 1) loc = expr.loc list_var = expr.args[0] # Get the type of the array to be created. array_typ = typemap[array_var.name] debug_print("inline array_var = ", array_var, " list_var = ", list_var) # Get the element type of the array to be created. dtype = array_typ.dtype # Get the sequence of operations to provide values to the new array. seq, _ = find_build_sequence(func_ir, list_var) size = len(seq) # Create a tuple to pass to empty below to specify the new array size. size_var = ir.Var(scope, mk_unique_var("size"), loc) size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) size_typ = types.intp size_tuple_typ = types.UniTuple(size_typ, 1) typemap[size_var.name] = size_typ typemap[size_tuple_var.name] = size_tuple_typ stmts.append( _new_definition(func_ir, size_var, ir.Const(size, loc=loc), loc)) stmts.append( _new_definition(func_ir, size_tuple_var, ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) # The general approach is to create an empty array and then fill # the elements in one-by-one from their specificiation. # Get the numpy type to pass to empty. nptype = types.DType(dtype) # Create a variable to hold the numpy empty function. empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) fnty = get_np_ufunc_typ(np.empty) sig = context.resolve_function_type(fnty, (size_typ, ), {'dtype': nptype}) typemap[empty_func.name] = fnty stmts.append( _new_definition(func_ir, empty_func, ir.Global('empty', np.empty, loc=loc), loc)) # We pass two arguments to empty, first the size tuple and second # the dtype of the new array. Here, we created typ_var which is # the dtype argument of the new array. typ_var in turn is created # by getattr of the dtype string on the numpy module. # Create var for numpy module. g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) typemap[g_np_var.name] = types.misc.Module(np) g_np = ir.Global('np', np, loc) stmts.append(_new_definition(func_ir, g_np_var, g_np, loc)) # Create var for result of numpy.<dtype>. typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc) typemap[typ_var.name] = nptype dtype_str = str(dtype) if dtype_str == 'bool': dtype_str = 'bool_' # Get dtype attribute of numpy module. np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc) stmts.append(_new_definition(func_ir, typ_var, np_typ_getattr, loc)) # Create the call to numpy.empty passing the size tuple and dtype var. empty_call = ir.Expr.call(empty_func, [size_var, typ_var], {}, loc=loc) calltypes[empty_call] = typing.signature(array_typ, size_typ, nptype) stmts.append(_new_definition(func_ir, array_var, empty_call, loc)) # Fill in the new empty array one-by-one. for i in range(size): index_var = ir.Var(scope, mk_unique_var("index"), loc) index_typ = types.intp typemap[index_var.name] = index_typ stmts.append( _new_definition(func_ir, index_var, ir.Const(i, loc), loc)) setitem = ir.SetItem(array_var, index_var, seq[i], loc) calltypes[setitem] = typing.signature(types.none, array_typ, index_typ, dtype) stmts.append(setitem) stmts.extend(dels) return True
def get_slice_step(typemap, func_ir, var): require(typemap[var.name] == types.slice3_type) call_expr = get_definition(func_ir, var) require(isinstance(call_expr, ir.Expr) and call_expr.op == 'call') assert len(call_expr.args) == 3 return call_expr.args[2]
def fix_array_assign(stmt): """For assignment like lhs[idx] = rhs, where both lhs and rhs are arrays, do the following: 1. find the definition of rhs, which has to be a call to numba.unsafe.ndarray.empty_inferred 2. find the source array creation for lhs, insert an extra dimension of size of b. 3. replace the definition of rhs = numba.unsafe.ndarray.empty_inferred(...) with rhs = lhs[idx] """ require(isinstance(stmt, ir.SetItem)) require(isinstance(stmt.value, ir.Var)) debug_print = _make_debug_print("fix_array_assign") debug_print("found SetItem: ", stmt) lhs = stmt.target # Find the source array creation of lhs lhs_def = find_array_def(lhs) debug_print("found lhs_def: ", lhs_def) rhs_def = get_definition(func_ir, stmt.value) debug_print("found rhs_def: ", rhs_def) require(isinstance(rhs_def, ir.Expr)) if rhs_def.op == 'cast': rhs_def = get_definition(func_ir, rhs_def.value) require(isinstance(rhs_def, ir.Expr)) require(_find_unsafe_empty_inferred(func_ir, rhs_def)) # Find the array dimension of rhs dim_def = get_definition(func_ir, rhs_def.args[0]) require(isinstance(dim_def, ir.Expr) and dim_def.op == 'build_tuple') debug_print("dim_def = ", dim_def) extra_dims = [ get_definition(func_ir, x, lhs_only=True) for x in dim_def.items ] debug_print("extra_dims = ", extra_dims) # Expand size tuple when creating lhs_def with extra_dims size_tuple_def = get_definition(func_ir, lhs_def.args[0]) require(isinstance(size_tuple_def, ir.Expr) and size_tuple_def.op == 'build_tuple') debug_print("size_tuple_def = ", size_tuple_def) extra_dims = fix_dependencies(size_tuple_def, extra_dims) size_tuple_def.items += extra_dims # In-place modify rhs_def to be getitem rhs_def.op = 'getitem' rhs_def.value = get_definition(func_ir, lhs, lhs_only=True) rhs_def.index = stmt.index del rhs_def._kws['func'] del rhs_def._kws['args'] del rhs_def._kws['vararg'] del rhs_def._kws['kws'] # success return True
def _inline_arraycall(func_ir, cfg, visited, loop, enable_prange=False): """Look for array(list) call in the exit block of a given loop, and turn list operations into array operations in the loop if the following conditions are met: 1. The exit block contains an array call on the list; 2. The list variable is no longer live after array call; 3. The list is created in the loop entry block; 4. The loop is created from an range iterator whose length is known prior to the loop; 5. There is only one list_append operation on the list variable in the loop body; 6. The block that contains list_append dominates the loop head, which ensures list length is the same as loop length; If any condition check fails, no modification will be made to the incoming IR. """ debug_print = _make_debug_print("inline_arraycall") # There should only be one loop exit require(len(loop.exits) == 1) exit_block = next(iter(loop.exits)) list_var, array_call_index, array_kws = _find_arraycall(func_ir, func_ir.blocks[exit_block]) # check if dtype is present in array call dtype_def = None dtype_mod_def = None if 'dtype' in array_kws: require(isinstance(array_kws['dtype'], ir.Var)) # We require that dtype argument to be a constant of getattr Expr, and we'll # remember its definition for later use. dtype_def = get_definition(func_ir, array_kws['dtype']) require(isinstance(dtype_def, ir.Expr) and dtype_def.op == 'getattr') dtype_mod_def = get_definition(func_ir, dtype_def.value) list_var_def = get_definition(func_ir, list_var) debug_print("list_var = ", list_var, " def = ", list_var_def) if isinstance(list_var_def, ir.Expr) and list_var_def.op == 'cast': list_var_def = get_definition(func_ir, list_var_def.value) # Check if the definition is a build_list require(isinstance(list_var_def, ir.Expr) and list_var_def.op == 'build_list') # Look for list_append in "last" block in loop body, which should be a block that is # a post-dominator of the loop header. list_append_stmts = [] for label in loop.body: # We have to consider blocks of this loop, but not sub-loops. # To achieve this, we require the set of "in_loops" of "label" to be visited loops. in_visited_loops = [l.header in visited for l in cfg.in_loops(label)] if not all(in_visited_loops): continue block = func_ir.blocks[label] debug_print("check loop body block ", label) for stmt in block.find_insts(ir.Assign): lhs = stmt.target expr = stmt.value if isinstance(expr, ir.Expr) and expr.op == 'call': func_def = get_definition(func_ir, expr.func) if isinstance(func_def, ir.Expr) and func_def.op == 'getattr' \ and func_def.attr == 'append': list_def = get_definition(func_ir, func_def.value) debug_print("list_def = ", list_def, list_def == list_var_def) if list_def == list_var_def: # found matching append call list_append_stmts.append((label, block, stmt)) # Require only one list_append, otherwise we won't know the indices require(len(list_append_stmts) == 1) append_block_label, append_block, append_stmt = list_append_stmts[0] # Check if append_block (besides loop entry) dominates loop header. # Since CFG doesn't give us this info without loop entry, we approximate # by checking if the predecessor set of the header block is the same # as loop_entries plus append_block, which is certainly more restrictive # than necessary, and can be relaxed if needed. preds = set(l for l, b in cfg.predecessors(loop.header)) debug_print("preds = ", preds, (loop.entries | set([append_block_label]))) require(preds == (loop.entries | set([append_block_label]))) # Find iterator in loop header iter_vars = [] iter_first_vars = [] loop_header = func_ir.blocks[loop.header] for stmt in loop_header.find_insts(ir.Assign): expr = stmt.value if isinstance(expr, ir.Expr): if expr.op == 'iternext': iter_def = get_definition(func_ir, expr.value) debug_print("iter_def = ", iter_def) iter_vars.append(expr.value) elif expr.op == 'pair_first': iter_first_vars.append(stmt.target) # Require only one iterator in loop header require(len(iter_vars) == 1 and len(iter_first_vars) == 1) iter_var = iter_vars[0] # variable that holds the iterator object iter_first_var = iter_first_vars[0] # variable that holds the value out of iterator # Final requirement: only one loop entry, and we're going to modify it by: # 1. replacing the list definition with an array definition; # 2. adding a counter for the array iteration. require(len(loop.entries) == 1) loop_entry = func_ir.blocks[next(iter(loop.entries))] terminator = loop_entry.terminator scope = loop_entry.scope loc = loop_entry.loc stmts = [] removed = [] def is_removed(val, removed): if isinstance(val, ir.Var): for x in removed: if x.name == val.name: return True return False # Skip list construction and skip terminator, add the rest to stmts for i in range(len(loop_entry.body) - 1): stmt = loop_entry.body[i] if isinstance(stmt, ir.Assign) and (stmt.value == list_def or is_removed(stmt.value, removed)): removed.append(stmt.target) else: stmts.append(stmt) debug_print("removed variables: ", removed) # Define an index_var to index the array. # If the range happens to be single step ranges like range(n), or range(m, n), # then the index_var correlates to iterator index; otherwise we'll have to # define a new counter. range_def = guard(_find_iter_range, func_ir, iter_var) index_var = ir.Var(scope, mk_unique_var("index"), loc) if range_def and range_def[0] == 0: # iterator starts with 0, index_var can just be iter_first_var index_var = iter_first_var else: # index_var = -1 # starting the index with -1 since it will incremented in loop header stmts.append(_new_definition(func_ir, index_var, ir.Const(value=-1, loc=loc), loc)) # Insert statement to get the size of the loop iterator size_var = ir.Var(scope, mk_unique_var("size"), loc) if range_def: start, stop, range_func_def = range_def if start == 0: size_val = stop else: size_val = ir.Expr.binop(fn='-', lhs=stop, rhs=start, loc=loc) # we can parallelize this loop if enable_prange = True, by changing # range function from range, to prange. if enable_prange and isinstance(range_func_def, ir.Global): range_func_def.name = 'internal_prange' range_func_def.value = internal_prange else: len_func_var = ir.Var(scope, mk_unique_var("len_func"), loc) stmts.append(_new_definition(func_ir, len_func_var, ir.Global('range_iter_len', range_iter_len, loc=loc), loc)) size_val = ir.Expr.call(len_func_var, (iter_var,), (), loc=loc) stmts.append(_new_definition(func_ir, size_var, size_val, loc)) size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) stmts.append(_new_definition(func_ir, size_tuple_var, ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) # Insert array allocation array_var = ir.Var(scope, mk_unique_var("array"), loc) empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) if dtype_def and dtype_mod_def: # when dtype is present, we'll call emtpy with dtype dtype_mod_var = ir.Var(scope, mk_unique_var("dtype_mod"), loc) dtype_var = ir.Var(scope, mk_unique_var("dtype"), loc) stmts.append(_new_definition(func_ir, dtype_mod_var, dtype_mod_def, loc)) stmts.append(_new_definition(func_ir, dtype_var, ir.Expr.getattr(dtype_mod_var, dtype_def.attr, loc), loc)) stmts.append(_new_definition(func_ir, empty_func, ir.Global('empty', np.empty, loc=loc), loc)) array_kws = [('dtype', dtype_var)] else: # otherwise we'll call unsafe_empty_inferred stmts.append(_new_definition(func_ir, empty_func, ir.Global('unsafe_empty_inferred', unsafe_empty_inferred, loc=loc), loc)) array_kws = [] # array_var = empty_func(size_tuple_var) stmts.append(_new_definition(func_ir, array_var, ir.Expr.call(empty_func, (size_tuple_var,), list(array_kws), loc=loc), loc)) # Add back removed just in case they are used by something else for var in removed: stmts.append(_new_definition(func_ir, var, array_var, loc)) # Add back terminator stmts.append(terminator) # Modify loop_entry loop_entry.body = stmts if range_def: if range_def[0] != 0: # when range doesn't start from 0, index_var becomes loop index # (iter_first_var) minus an offset (range_def[0]) terminator = loop_header.terminator assert(isinstance(terminator, ir.Branch)) # find the block in the loop body that header jumps to block_id = terminator.truebr blk = func_ir.blocks[block_id] loc = blk.loc blk.body.insert(0, _new_definition(func_ir, index_var, ir.Expr.binop(fn='-', lhs=iter_first_var, rhs=range_def[0], loc=loc), loc)) else: # Insert index_var increment to the end of loop header loc = loop_header.loc terminator = loop_header.terminator stmts = loop_header.body[0:-1] next_index_var = ir.Var(scope, mk_unique_var("next_index"), loc) one = ir.Var(scope, mk_unique_var("one"), loc) # one = 1 stmts.append(_new_definition(func_ir, one, ir.Const(value=1,loc=loc), loc)) # next_index_var = index_var + 1 stmts.append(_new_definition(func_ir, next_index_var, ir.Expr.binop(fn='+', lhs=index_var, rhs=one, loc=loc), loc)) # index_var = next_index_var stmts.append(_new_definition(func_ir, index_var, next_index_var, loc)) stmts.append(terminator) loop_header.body = stmts # In append_block, change list_append into array assign for i in range(len(append_block.body)): if append_block.body[i] == append_stmt: debug_print("Replace append with SetItem") append_block.body[i] = ir.SetItem(target=array_var, index=index_var, value=append_stmt.value.args[0], loc=append_stmt.loc) # replace array call, by changing "a = array(b)" to "a = b" stmt = func_ir.blocks[exit_block].body[array_call_index] # stmt can be either array call or SetItem, we only replace array call if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): stmt.value = array_var func_ir._definitions[stmt.target.name] = [stmt.value] return True